Sched-tools

sched-tools@lists.linaro.org

94 discussions

[PATCH] rt-app: add a temporary log buffer in memory

by Vincent Guittot

With the new mechanism that is used to describe a scenario, it becomes more complex to estimate how many logs will be generated during the execution of a use case. As a result, the record of the logs in temporary buffer had been disable and the logs were saved directly in files. The potential side effect of such mecanism is to block threads on io access, which disturbs the use case behavior. A new parameter is added to define the saving policy of the logs. You can now disable the logs, save them directly in a file like previously, define the size of a temporary buffer but with the risk to lost some logs if the buffer overflow. You can also ask rt-app to evaluate how many logs will be generated and allocate the temporary buffer accordingly. This last mode is not yet implemented and it will be part of a dedicated patch. Full details of the new parameter is available in the update of the documentation Signed-off-by: Vincent Guittot <vincent.guittot(a)linaro.org> --- doc/tutorial.txt | 15 ++++++++++++++ src/rt-app.c | 34 +++++++++++++++++++++++-------- src/rt-app_parse_config.c | 51 ++++++++++++++++++++++++++++++++++++++--------- src/rt-app_types.h | 1 + 4 files changed, 84 insertions(+), 17 deletions(-) diff --git a/doc/tutorial.txt b/doc/tutorial.txt index 12aba47..8341b2e 100644 --- a/doc/tutorial.txt +++ b/doc/tutorial.txt @@ -61,6 +61,20 @@ the current directory (./). * log_basename : Text. Prefix used for all log files of the use case. "rt-app-" is used by default. +* log_size : String or Integer. A Integer defines a fix size in MB of the +temporary buffer (size per thread) that will be used to store the log data +before saving them in a file. This temporary buffer is used as a cicular +buffer so the oldest data will be lost in case of overflow. A string is used +to set a predifined behavior: + - "file" will be used to store the log data directly in the file without + using a temporary buffer. + - "Disable" will disable the log mecahnism. + - "Auto" will let rt-app compute the buffer size to not overflow the latter + during the use case. +The use of a temporary buffer prevents the threads of unexpected wait during +io access. The "Auto" mode is not implemented yet and fallback to "file" mode +for the moment. + * ftrace: Boolean. If enable, rt-app logs in ftrace the main events of the use case. Default value is False. @@ -75,6 +89,7 @@ each threads (see gnuplot section for more details). Default value is False. "pi_enabled" : false, "lock_pages" : false, "logdir" : "./", + "log_size" : "file", "log_basename" : "rt-app", "ftrace" : false, "gnuplot" : false, diff --git a/src/rt-app.c b/src/rt-app.c index 6a27308..805cc35 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -391,10 +391,11 @@ void *thread_body(void *arg) timing_point_t *curr_timing; timing_point_t *timings; timing_point_t tmp_timing; + unsigned int timings_size, timing_loop; pid_t tid; struct sched_attr attr; unsigned int flags = 0; - int ret, i, j, loop; + int ret, i, j, loop, idx; /* Set thread name */ ret = pthread_setname_np(pthread_self(), data->name); @@ -488,6 +489,15 @@ void *thread_body(void *arg) exit(EXIT_FAILURE); } + if (opts.logsize > 0) { + timings = malloc(opts.logsize); + timings_size = opts.logsize / sizeof(timing_point_t); + } else { + timings = NULL; + timings_size = 0; + } + timing_loop = 0; + /* Lock pages */ if (data->lock_pages == 1) { @@ -502,8 +512,6 @@ void *thread_body(void *arg) log_notice("[%d] starting thread ...\n", data->ind); - timings = NULL; - fprintf(data->log_handler, "#idx\tperf\trun\tperiod\tstart\t\tend\t\trel_st\n"); if (opts.ftrace) @@ -526,7 +534,7 @@ void *thread_body(void *arg) } } #endif - i = j = loop = 0; + i = j = loop = idx = 0; while (continue_running && (i != data->loop)) { struct timespec t_diff, t_rel_start; @@ -541,7 +549,7 @@ void *thread_body(void *arg) clock_gettime(CLOCK_MONOTONIC, &t_end); if (timings) - curr_timing = &timings[loop]; + curr_timing = &timings[idx]; else curr_timing = &tmp_timing; @@ -556,7 +564,7 @@ void *thread_body(void *arg) curr_timing->duration = duration; curr_timing->perf = perf; - if (!timings) + if (opts.logsize && !timings) log_timing(data->log_handler, curr_timing); if (opts.ftrace) @@ -575,6 +583,12 @@ void *thread_body(void *arg) pdata = &data->phases[j]; } + + idx++; + if (idx >= timings_size) { + timing_loop = 1; + idx = 0; + } } param.sched_priority = 0; @@ -587,9 +601,13 @@ void *thread_body(void *arg) exit(EXIT_FAILURE); } - if (timings) - for (j=0; j < loop; j++) + if (timings) { + for (j = idx; timing_loop && (j < timings_size); j++) + log_timing(data->log_handler, &timings[j]); + for (j = 0; j < idx; j++) log_timing(data->log_handler, &timings[j]); + } + if (opts.ftrace) log_ftrace(ft_data.marker_fd, "[%d] exiting", data->ind); diff --git a/src/rt-app_parse_config.c b/src/rt-app_parse_config.c index def3070..98374cd 100644 --- a/src/rt-app_parse_config.c +++ b/src/rt-app_parse_config.c @@ -744,8 +744,8 @@ parse_tasks(struct json_object *tasks, rtapp_options_t *opts) static void parse_global(struct json_object *global, rtapp_options_t *opts) { - char *policy, *cal_str; - struct json_object *cal_obj; + char *policy, *tmp_str; + struct json_object *tmp_obj; int scan_cnt; log_info(PFX "Parsing global section"); @@ -760,6 +760,7 @@ parse_global(struct json_object *global, rtapp_options_t *opts) opts->logdir = strdup("./"); opts->lock_pages = 1; opts->logbasename = strdup("rt-app"); + opts->logsize = 0; opts->ftrace = 0; opts->pi_enabled = 0; opts->io_device = strdup("/dev/null"); @@ -781,27 +782,27 @@ parse_global(struct json_object *global, rtapp_options_t *opts) */ free(policy); - cal_obj = get_in_object(global, "calibration", TRUE); - if (cal_obj == NULL) { + tmp_obj = get_in_object(global, "calibration", TRUE); + if (tmp_obj == NULL) { /* no setting ? Calibrate CPU0 */ opts->calib_cpu = 0; opts->calib_ns_per_loop = 0; log_error("missing calibration setting force CPU0"); } else { - if (json_object_is_type(cal_obj, json_type_int)) { + if (json_object_is_type(tmp_obj, json_type_int)) { /* integer (no " ") detected. */ - opts->calib_ns_per_loop = json_object_get_int(cal_obj); + opts->calib_ns_per_loop = json_object_get_int(tmp_obj); log_debug("ns_per_loop %d", opts->calib_ns_per_loop); } else { /* Get CPU number */ - cal_str = get_string_value_from(global, "calibration", + tmp_str = get_string_value_from(global, "calibration", TRUE, "CPU0"); - scan_cnt = sscanf(cal_str, "CPU%d", &opts->calib_cpu); + scan_cnt = sscanf(tmp_str, "CPU%d", &opts->calib_cpu); /* * get_string_value_from allocate the string so with have to free it * once useless */ - free(cal_str); + free(tmp_str); if (!scan_cnt) { log_critical(PFX "Invalid calibration CPU%d", opts->calib_cpu); exit(EXIT_INV_CONFIG); @@ -810,6 +811,38 @@ parse_global(struct json_object *global, rtapp_options_t *opts) } } + tmp_obj = get_in_object(global, "log_size", TRUE); + if (tmp_obj == NULL) { + /* no size ? use file system */ + opts->logsize = -2; + } else { + if (json_object_is_type(tmp_obj, json_type_int)) { + /* integer (no " ") detected. */ + /* buffer size is set in MB */ + opts->logsize = json_object_get_int(tmp_obj) << 20; + log_notice("Log buffer size fixed to %dMB per threads", (opts->logsize >> 20)); + } else { + /* Get CPU number */ + tmp_str = get_string_value_from(global, "log_size", + TRUE, "disable"); + + if (strcmp(tmp_str, "disable")) + opts->logsize = 0; + else if (strcmp(tmp_str, "file")) + opts->logsize = -2; + else if (strcmp(tmp_str, "auto")) + opts->logsize = -2; /* Automatic buffer size computation is not supported yet so we fall back on file system mode */ + + log_debug("Log buffer set to %s mode", tmp_str); + + /* + * get_string_value_from allocate the string so with have to free it + * once useless + */ + free(tmp_str); + } + } + opts->logdir = get_string_value_from(global, "logdir", TRUE, "./"); opts->lock_pages = get_bool_value_from(global, "lock_pages", TRUE, 1); opts->logbasename = get_string_value_from(global, "log_basename", diff --git a/src/rt-app_types.h b/src/rt-app_types.h index aab96a7..2ce9c9d 100644 --- a/src/rt-app_types.h +++ b/src/rt-app_types.h @@ -170,6 +170,7 @@ typedef struct _rtapp_options_t { char *logdir; char *logbasename; + int logsize; int gnuplot; int calib_cpu; int calib_ns_per_loop; -- 1.9.1

10 years, 6 months

[PATCH] rt-app: update spreading tasks example

by Vincent Guittot

Update the spreading-tasks.json example in order to test all possible transition sequences between light and heavy loads of 2 threads. Signed-off-by: Vincent Guittot <vincent.guittot(a)linaro.org> --- doc/examples/spreading-tasks.json | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/doc/examples/spreading-tasks.json b/doc/examples/spreading-tasks.json index 844db44..be78118 100644 --- a/doc/examples/spreading-tasks.json +++ b/doc/examples/spreading-tasks.json @@ -1,20 +1,46 @@ { "tasks" : { - "thread" : { - "instance" : 2, + "thread1" : { + "instance" : 1, "loop" : -1, "phases" : { "light" : { - "loop" : 600, + "loop" : 300, "run" : 1000, "timer" : { "ref" : "unique", "period" : 10000 } }, "heavy" : { - "loop" : 600, + "loop" : 300, "run" : 7000, "timer" : { "ref" : "unique", "period" : 10000 } } } + }, + "thread2" : { + "instance" : 1, + "loop" : -1, + "phases" : { + "light1" : { + "loop" : 900, + "run" : 1000, + "timer" : { "ref" : "unique", "period" : 10000 } + }, + "heavy1" : { + "loop" : 600, + "run" : 7000, + "timer" : { "ref" : "unique", "period" : 10000 } + }, + "light2" : { + "loop" : 300, + "run" : 1000, + "timer" : { "ref" : "unique", "period" : 10000 } + }, + "heavy1" : { + "loop" : 600, + "run" : 7000, + "timer" : { "ref" : "unique", "period" : 10000 } + }, + } } }, "global" : { -- 1.9.1

10 years, 6 months

[PATCH] rt-app: reset timer that overruns

by Vincent Guittot

If the next wake up date of the timer is already in the past when we execute the event, we resets the timer.t_next to the current time. Otherwise, if the delay has been too large, we might never recover it and the timer will always be in the past. Signed-off-by: Vincent Guittot <vincent.guittot(a)linaro.org> --- src/rt-app.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rt-app.c b/src/rt-app.c index 37e9892..20383a2 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -253,6 +253,8 @@ static int run_event(event_data_t *event, int dry_run, clock_gettime(CLOCK_MONOTONIC, &t_now); if (timespec_lower(&t_now, &rdata->res.timer.t_next)) clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &rdata->res.timer.t_next, NULL); + else + clock_gettime(CLOCK_MONOTONIC, &rdata->res.timer.t_next); } break; case rtapp_suspend: -- 1.9.1

10 years, 6 months

Fwd: rt-app cross-compile issue

by $rik＠nth

Hi Linaro Sched team, I am trying to cross compile rt-app to arm android. I've followed the steps mentioned in the Readme file. But I am facing some issues while configuring it to arm. attached are the config log. Command issued export CC=arm-linux-gnueabi-gcc ./configure --host=arm Also i am using following http://packages.ubuntu.com/trusty/libjson-c-dev -- Thanks & Regards, M.Srikanth Kumar.

10 years, 6 months

[RESEND PATCH v3] doc: measure the efficiency of cpufreq governors

by Xunlei Pang

From: Xunlei Pang <pang.xunlei(a)linaro.org> DVFS adds a latency in the execution of task because of the time to decide to move at max freq. We need to measure this latency and check that the governor stays in an acceptable range. When rt-app runs a json file, a log file is created for each thread. This log file records the number of loop that has been executed and the duration for executing these loops (per phase). We can use these figures to evaluate to latency that is added by a cpufreq governor and its "performance efficiency". We use the run+sleep pattern to do the measurement, for the run time per loop, the performance governor should run the expected duration as the CPU stays a max freq. At the opposite, the powersave governor will give use the longest duration (as it stays at lowest OPP). Other governor will be somewhere between the 2 previous duration as they will use several OPP and will go back to max frequency after a defined duration which depends on its monitoring period. The formula: duration of powersave gov - duration of the gov -------------------------------------------------------- x 100% duration of powersave gov - duration of performance gov will give the efficiency of the governor. 100% means as efficient as the perf governor and 0% means as efficient as the powersave governor. This patch offers json files and shell scripts to do the measurement. Usage: ./calibration.sh <cpu> cpu: cpu number on which you want to run the test ./test.sh <governor> <cpu> <runtime> <sleeptime> [<loops>] governor: target CPUFreq governor you want to test cpu: cpu number on which you want to run the test. Be the same as the one passed to "calibration.sh". runtime: running time in ms per loop of the workload pattern sleeptime: sleeping time in ms per loop of the workload pattern loops: repeat times of the workload pattern. default: 10 Example: "./calibration.sh 0" means to calculate the computing capacity of CPU0 which will be used in the following test. "./test.sh ondemand 0 100 100 20" means to test "ondemand" on CPU0 with workload pattern "run 100ms + sleep 100ms"(20 loops). NOTE: - Make sure there are "sed", "cut", "grep", "rt-app", etc tools on your test machine, and run the scripts under root privilege. - Run the test while the system is idle. - You can change the target governor's parameters after running "calibration.sh", but before "test.sh". Signed-off-by: Xunlei Pang <pang.xunlei(a)linaro.org> --- doc/examples/cpufreq_governor_efficiency/README | 60 ++++++++++++ .../cpufreq_governor_efficiency/calibration.json | 26 ++++++ .../cpufreq_governor_efficiency/calibration.sh | 17 ++++ doc/examples/cpufreq_governor_efficiency/dvfs.json | 27 ++++++ doc/examples/cpufreq_governor_efficiency/dvfs.sh | 38 ++++++++ doc/examples/cpufreq_governor_efficiency/test.sh | 104 +++++++++++++++++++++ 6 files changed, 272 insertions(+) create mode 100755 doc/examples/cpufreq_governor_efficiency/README create mode 100755 doc/examples/cpufreq_governor_efficiency/calibration.json create mode 100755 doc/examples/cpufreq_governor_efficiency/calibration.sh create mode 100755 doc/examples/cpufreq_governor_efficiency/dvfs.json create mode 100755 doc/examples/cpufreq_governor_efficiency/dvfs.sh create mode 100755 doc/examples/cpufreq_governor_efficiency/test.sh diff --git a/doc/examples/cpufreq_governor_efficiency/README b/doc/examples/cpufreq_governor_efficiency/README new file mode 100755 index 0000000..10482b8 --- /dev/null +++ b/doc/examples/cpufreq_governor_efficiency/README @@ -0,0 +1,60 @@ +Measure the efficiency of cpufreq governors using rt-app + +BACKGROUND: + DVFS adds a latency in the execution of task because of the time to + decide to move at max freq. We need to measure this latency and check + that the governor stays in an acceptable range. + + When rt-app runs a json file, a log file is created for each thread. + This log file records the number of loop that has been executed and + the duration for executing these loops (per phase). We can use these + figures to evaluate to latency that is added by a cpufreq governor + and its "performance efficiency". + + We use the run+sleep pattern to do the measurement, for the run time per + loop, the performance governor should run the expected duration as the + CPU stays a max freq. At the opposite, the powersave governor will give + use the longest duration (as it stays at lowest OPP). Other governor will + be somewhere between the 2 previous duration as they will use several OPP + and will go back to max frequency after a defined duration which depends + on its monitoring period. + + The formula: + + duration of powersave gov - duration of the gov + -------------------------------------------------------- x 100% + duration of powersave gov - duration of performance gov + + will give the efficiency of the governor. 100% means as efficient as + the perf governor and 0% means as efficient as the powersave governor. + + This test offers json files and shell scripts to do the measurement. + +Usage: + ./calibration.sh <cpu> + cpu: cpu number on which you want to run the test + + ./test.sh <governor> <cpu> <runtime> <sleeptime> [<loops>] + governor: target CPUFreq governor you want to test + cpu: cpu number on which you want to run the test. Be the same + as the one passing to "calibration.sh". + runtime: running time in ms per loop of the workload pattern + sleeptime: sleeping time in ms per loop of the workload pattern + loops: repeat times of the workload pattern. default: 10 + +Example: + "./calibration.sh 0" means to calculate the computing capacity of CPU0 which + will be used in the following test. + + "./test.sh ondemand 0 100 100 20" means to + test "ondemand" on CPU0 with workload pattern "run 100ms + sleep 100ms"(20 loops). + +NOTE: + - Make sure there are "sed", "cut", "grep", "rt-app", etc tools on + your test machine, and run the scripts under root privilege. + + - Run the test while the system is idle. + + - You can change the target governor's parameters after running "calibration.sh", + but before "test.sh". + diff --git a/doc/examples/cpufreq_governor_efficiency/calibration.json b/doc/examples/cpufreq_governor_efficiency/calibration.json new file mode 100755 index 0000000..2d5870c --- /dev/null +++ b/doc/examples/cpufreq_governor_efficiency/calibration.json @@ -0,0 +1,26 @@ +{ + "tasks" : { + "thread" : { + "instance" : 1, + "loop" : 1, + "phases" : { + "run" : { + "loop" : 1, + "run" : 2000, + }, + "sleep" : { + "loop" : 1, + "sleep" : 2000, + } + } + } + }, + "global" : { + "default_policy" : "SCHED_FIFO", + "calibration" : "CPU0", + "lock_pages" : true, + "ftrace" : false, + "logdir" : "./", + } +} + diff --git a/doc/examples/cpufreq_governor_efficiency/calibration.sh b/doc/examples/cpufreq_governor_efficiency/calibration.sh new file mode 100755 index 0000000..a217487 --- /dev/null +++ b/doc/examples/cpufreq_governor_efficiency/calibration.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -e + +if [ ! $1 ] ; then + echo "Please input one cpu" + exit +fi + +echo performance > /sys/devices/system/cpu/cpu$1/cpufreq/scaling_governor + +sleep 1 + +sed 's/"calibration" : "CPU.*",/"calibration" : "CPU'$1'",/' -i calibration.json +pLoad=$(rt-app calibration.json 2>&1 |grep pLoad |sed 's/.*= $.*$ns.*/\1/') +sed 's/"calibration" : .*,/"calibration" : '$pLoad',/' -i dvfs.json +echo CPU$1\'s pLoad is $pLoad diff --git a/doc/examples/cpufreq_governor_efficiency/dvfs.json b/doc/examples/cpufreq_governor_efficiency/dvfs.json new file mode 100755 index 0000000..c8447df --- /dev/null +++ b/doc/examples/cpufreq_governor_efficiency/dvfs.json @@ -0,0 +1,27 @@ +{ + "tasks" : { + "thread" : { + "instance" : 1, + "cpus" : [0], +"loop" : 21, + "phases" : { + "running" : { + "loop" : 1, + "run" : 100, + }, + "sleeping" : { + "loop" : 1, + "sleep" : 100, + } + } + } + }, + "global" : { + "default_policy" : "SCHED_OTHER", + "calibration" : 90, + "lock_pages" : true, + "ftrace" : false, + "logdir" : "./", + } +} + diff --git a/doc/examples/cpufreq_governor_efficiency/dvfs.sh b/doc/examples/cpufreq_governor_efficiency/dvfs.sh new file mode 100755 index 0000000..00ce81d --- /dev/null +++ b/doc/examples/cpufreq_governor_efficiency/dvfs.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +# $1 $2 $3 $4 $5: governor cpu run sleep loops +set -e + +echo $1 > /sys/devices/system/cpu/cpu$2/cpufreq/scaling_governor +#echo $1 > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor +sed 's/"cpus" : \[.*\],/"cpus" : \['$2'\],/' -i dvfs.json +sleep 3 + +if [ $3 ] ; then + sed 's/"run" : .*,/"run" : '$3',/' -i dvfs.json +fi + +if [ $4 ] ; then + sed 's/"sleep" : .*,/"sleep" : '$4',/' -i dvfs.json +fi + +if [ $5 ] ; then + sed 's/^"loop" : .*,/"loop" : '$5',/' -i dvfs.json +fi + +rt-app dvfs.json 2> /dev/null + +if [ $1 ] ; then + mv -f rt-app-thread-0.log rt-app_$1_run$3us_sleep$4us.log + + sum=0 + loop=0 + for i in $(cat rt-app_$1_run$3us_sleep$4us.log | sed 'n;d' | sed '1d' |cut -f 3); do + loop=$(expr $loop + 1) + sum=$(expr $sum + $i) + done + sum=$(expr $sum / $loop) + echo $sum + rm -f rt-app_$1_run$3us_sleep$4us.log +fi + diff --git a/doc/examples/cpufreq_governor_efficiency/test.sh b/doc/examples/cpufreq_governor_efficiency/test.sh new file mode 100755 index 0000000..0160952 --- /dev/null +++ b/doc/examples/cpufreq_governor_efficiency/test.sh @@ -0,0 +1,104 @@ +#!/bin/sh + +set -e + +test_efficiency() { + + FILENAME="results_$RANDOM$$.txt" + + if [ -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors ]; then + for i in $(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors); do + if [ $i = $1 ] ; then + gov_target=$i + fi + export gov_$i=$(echo $i) + done + else + echo "cpufreq sysfs is not available!" + exit + fi + + if [ ! $gov_target ] ; then + echo " Can't find $1 governor!" + exit + fi + + if [ ! $gov_performance ] ; then + echo "Can't find performance governor!" + exit + fi + + if [ ! $gov_powersave ] ; then + echo "Can't find powersave governor!" + exit + fi + + if [ $gov_target = $gov_performance ] || [ $gov_target = $gov_powersave ] ; then + echo "Please input a governor other than \"performance\" or \"powersave\"" + exit + fi + + # Get target gov data first + dvfs.sh $1 $2 $3 $4 $5 > $FILENAME + target=$(cat $FILENAME |sed -n '1p') + + # Get powersave data + dvfs.sh powersave $2 $3 $4 $5 > $FILENAME + powersave=$(cat $FILENAME |sed -n '1p') + + # Get performance data + dvfs.sh performance $2 $3 $4 $5 > $FILENAME + performance=$(cat $FILENAME |sed -n '1p') + + if [ $performance -ge $powersave ] ; then + echo "powersave: $powersave" + echo "performance: $performance" + echo "Error! performance spent more time than powersave!" + exit + fi + + echo "\"powersave\" efficiency: 0%" + echo "\"performance\" efficiency: 100%" + + denominator=$(expr $powersave - $performance) + + if [ $powersave -le $target ]; then + target=0 + else + numerator=$(expr $powersave - $target) + numerator=$(expr $numerator \* 100) + target=$(expr $numerator / $denominator) + if [ $target -gt 100 ]; then + target=100 + fi + fi + + echo "\"$gov_target\" efficiency: $target%" + + rm -f $FILENAME +} + +if [ $# -lt 4 ]; then + echo "Usage: ./test.sh <governor> <cpu> <runtime> <sleeptime> [<loops>]" + echo "governor: target CPUFreq governor you want to test" + echo "cpu: cpu number on which you want to run the test" + echo "runtime: running time in ms per loop of the workload pattern" + echo "sleeptime: sleeping time in ms per loop of the workload pattern" + echo "loops: repeat times of the workload pattern. default: 10" + echo "\nExample:\n\"./test.sh ondemand 0 100 100 20\" means\nTest \"ondemand\" on CPU0 with workload pattern \"run 100ms + sleep 100ms\"(20 loops).\n" + exit +fi + +if [ $# = 4 ]; then + loops=10 +else + loops=$5 +fi + +echo "Test \"$1\" on CPU$2 with workload pattern \"run $3ms + sleep $4ms\"($loops loops)." + +sleep 1 +PATH=$PATH:. + +test_efficiency $1 $2 $(expr $3 \* 1000) $(expr $4 \* 1000) $loops + -- 1.9.1

10 years, 6 months

[RESEND PATCH v3] idlestat: Support energy modeling of idle consumption at different OPPs

by Xunlei Pang

From: Xunlei Pang <pang.xunlei(a)linaro.org> BACKGROUND Overview: Currently idlestat only handles a energy model with a single value for energy savings when going to WFI. IOW, idlestat does not know what OPP (frequency, voltage) is this WFI energy savings calculated at. Add funtionality to allow a table of OPP and energy saving values to be listed so that the savings at various frequencies can be modeled correctly. As an example, a platform may have different power consumption when idled at different OPPs, as follow: @1989 MHz: single core in WFI consumes about 120 mW @1001 MHz: single core in WFI consumes about 45 mW @ 507 MHz: single core in WFI consumes about 30 mW This data is useful to optimise a platform's idle policy. MODIFICATION METHOD Now we want to modify the format of idlestat's energy model to accomodate the different C-states energy data at different OPPs. Let's take a simple example to explicate the way: Assuming different C-States energy data below: 1200Mhz: WFI 80 10 1200Mhz: cluster-sleep-b 30 5 1000Mhz: WFI 70 0 1000Mhz: cluster-sleep-b 25 0 Before modification: clusterA: 2 cap states 2 C states P-states: 1200 15200 6997 1000 8446 3846 C-states: WFI 70 0 cluster-sleep-b 25 0 After modification: clusterA: 2 cap states 2 C states P-state C-states pair 1: 1200 15200 6997 WFI 80 10 cluster-sleep-b 30 5 P-state C-states pair 2: 1000 8446 3846 WFI 70 0 cluster-sleep-b 25 0 In this way, we ensure P-states and C-States always appear in pairs. Signed-off-by: Xunlei Pang <pang.xunlei(a)linaro.org> --- energy_model.c | 101 ++++++++++++++++++++++----------------------------- idlestat.c | 63 +++++++++++++++++++++++++++++++- idlestat.h | 10 +++++ tracefile_idlestat.c | 4 ++ 4 files changed, 119 insertions(+), 59 deletions(-) diff --git a/energy_model.c b/energy_model.c index 2ea6fdd..8beb6a8 100644 --- a/energy_model.c +++ b/energy_model.c @@ -75,41 +75,38 @@ static int make_energy_model_template(struct program_options *options) list_for_each_entry(s_phy, &cpu_topo->physical_head, list_physical) { unsigned int num_cap_states = 0; unsigned int num_c_states = 0; - int i; + int i, j, pair; s_core = list_entry((&s_phy->core_head)->prev, struct cpu_core, list_core); s_cpu = list_entry((&s_core->cpu_head)->prev, struct cpu_cpu, list_cpu); num_c_states = s_cpu->cstates->cstate_max + 1; for (i = 0; i < s_cpu->pstates->max; i++) { struct cpufreq_pstate *p = &s_cpu->pstates->pstate[i]; - if (p->freq == 0) continue; num_cap_states++; } - fprintf(f, "\nC-states:\n"); - fprintf(f, "cluster%c: %d cap states %d C states\n\n", cluster_number + 'A', num_cap_states, num_c_states); - fprintf(f, "P-states:\n"); - fprintf(f, "# speed, cluster power, core power\n"); + pair = 0; for (i = 0; i < s_cpu->pstates->max; i++) { struct cpufreq_pstate *p = &s_cpu->pstates->pstate[i]; if (p->freq == 0) continue; + fprintf(f, "P-state C-states pair %d:\n", ++pair); + fprintf(f, "# P-state: speed, cluster power, core power\n"); fprintf(f, "%d\t\t?\t?\n", p->freq/1000); - } - fprintf(f, "\nC-states:\n"); - fprintf(f, "# name, cluster power, core power\n"); - for (i = 0; i < s_cpu->cstates->cstate_max + 1; i++) { - struct cpuidle_cstate *c = &s_cpu->cstates->cstate[i]; + fprintf(f, "# C-states: name, cluster power, core power\n"); + for (j = 0; j < s_cpu->cstates->cstate_max + 1; j++) { + struct cpuidle_cstate *c = &s_cpu->cstates->cstate[j]; - fprintf(f, "%s\t\t?\t?\n", c->name); + fprintf(f, "%s\t\t?\t?\n", c->name); + } } fprintf(f, "\n"); cluster_number++; @@ -130,7 +127,7 @@ int parse_energy_model(struct program_options *options) struct cluster_energy_info *clustp = NULL; unsigned int number_cap_states, number_c_states; int current_cluster = -1; - unsigned int current_pstate = 0; + unsigned int current_pstate = 0, speed; unsigned int current_cstate = 0; unsigned int clust_p, core_p; char buffer[BUFSIZE]; @@ -196,12 +193,13 @@ int parse_energy_model(struct program_options *options) clustp->number_c_states = number_c_states; clustp->p_energy = calloc(number_cap_states, sizeof(struct pstate_energy_info)); - clustp->c_energy = calloc(number_c_states, + clustp->c_energy = calloc(number_c_states * number_cap_states, sizeof(struct cstate_energy_info)); clustp->state = parsed_cluster_info; + current_pstate = 0; continue; } - if (strstr(buffer, "P-states")) { + if (strstr(buffer, "P-state C-states pair")) { if (current_cluster == -1) { fprintf(stderr, "%s: unknown cluster (cap states) in %s\n", __func__, path); @@ -214,27 +212,9 @@ int parse_energy_model(struct program_options *options) fclose(f); return -1; } - current_pstate = 0; clustp->state = parsing_cap_states; continue; } - if (strstr(buffer, "C-states")) { - if (current_cluster == -1) { - fprintf(stderr, "%s: unknown cluster (c states) in %s\n", - __func__, path); - fclose(f); - return -1; - } - if (clustp->state < parsed_cluster_info) { - fprintf(stderr, "%s: number of c states for cluster%c not specified in %s\n", - __func__, current_cluster, path); - fclose(f); - return -1; - } - current_cstate = 0; - clustp->state = parsing_c_states; - continue; - } if (strstr(buffer, "wakeup")) { unsigned int clust_w, core_w; @@ -254,10 +234,9 @@ int parse_energy_model(struct program_options *options) __func__, path); fclose(f); return -1; - } + } if (clustp->state == parsing_cap_states) { struct pstate_energy_info *pp; - unsigned int speed; if (sscanf(buffer, "%d %d %d", &speed, &clust_p, &core_p) != 3) { fprintf(stderr, "%s: expected P state (speed cluster core) for cluster%c in %s\n", @@ -276,6 +255,8 @@ int parse_energy_model(struct program_options *options) pp->speed = speed; pp->cluster_power = clust_p; pp->core_power = core_p; + clustp->state = parsing_c_states; + current_cstate = 0; continue; } if (clustp->state == parsing_c_states) { @@ -295,8 +276,9 @@ int parse_energy_model(struct program_options *options) fclose(f); return -1; } - cp = &clustp->c_energy[current_cstate++]; + cp = &clustp->c_energy[(current_pstate - 1) * clustp->number_c_states + current_cstate++]; strncpy(cp->cstate_name, name, NAMELEN); + cp->speed = speed; cp->cluster_idle_power = clust_p; cp->core_idle_power = core_p; continue; @@ -309,7 +291,7 @@ int parse_energy_model(struct program_options *options) return 0; } -static struct cstate_energy_info *find_cstate_energy_info(const unsigned int cluster, const char *name) +static struct cstate_energy_info *find_cstate_energy_info(const unsigned int cluster, unsigned int speed, const char *name) { struct cluster_energy_info *clustp; struct cstate_energy_info *cp; @@ -317,8 +299,8 @@ static struct cstate_energy_info *find_cstate_energy_info(const unsigned int clu clustp = cluster_energy_table + cluster; cp = &clustp->c_energy[0]; - for (i = 0; i < clustp->number_c_states; i++, cp++) { - if (!strcmp(cp->cstate_name, name)) return cp; + for (i = 0; i < clustp->number_c_states * clustp->number_cap_states; i++, cp++) { + if (cp->speed == speed && !strcmp(cp->cstate_name, name)) return cp; } return NULL; } @@ -377,6 +359,7 @@ void calculate_energy_consumption(struct cpu_topology *cpu_topo) for (j = 0; j < s_phy->cstates->cstate_max + 1; j++) { struct cpuidle_cstate *c = &s_phy->cstates->cstate[j]; + struct duration_opp *opps = c->duration_opps; if (c->nrdata == 0) { verbose_fprintf(stderr, 2, @@ -385,19 +368,20 @@ void calculate_energy_consumption(struct cpu_topology *cpu_topo) continue; } - cp = find_cstate_energy_info(current_cluster, c->name); - if (!cp) { - verbose_fprintf(stderr, 2, " C%-2d no energy model for [%s] (%d hits, %f duration)\n", - j, c->name, c->nrdata, c->duration); - continue; - } + for (i = 0; i < c->nropp; i++) { + cp = find_cstate_energy_info(current_cluster, opps[i].speed/1000, c->name); + if (!cp) { + verbose_fprintf(stderr, 2, " C%-2d no energy model for [%s] (%d hits, %f duration) %d speed\n", + j, c->name, c->nrdata, opps[i].duration, opps[i].speed); + continue; + } - cluster_idl += c->duration * cp->cluster_idle_power; + cluster_idl += opps[i].duration * cp->cluster_idle_power; + } - verbose_fprintf(stderr, 1, " C%-2d +%7d hits for [%15s] | %13.0f | %7d | %7s | %12s | %12.0f | %12s |\n", + verbose_fprintf(stderr, 1, " C%-2d +%7d hits for [%15s] | %13.0f | %7s | %12s | %12.0f | %12s |\n", j, c->nrdata, c->name, c->duration, - cp->cluster_idle_power, "", "", cluster_idl, ""); @@ -442,25 +426,28 @@ void calculate_energy_consumption(struct cpu_topology *cpu_topo) for (i = 0; i < s_cpu->cstates->cstate_max + 1; i++) { struct cpuidle_cstate *c = &s_cpu->cstates->cstate[i]; + struct duration_opp *opps = c->duration_opps; + if (c->nrdata == 0) { verbose_fprintf(stderr, 2, "Cpu%d C%-2d no hits for [%15s] | 0 | 0 | | | 0 | |\n", s_cpu->cpu_id, i, c->name); continue; } - cp = find_cstate_energy_info(current_cluster, c->name); - if (!cp) { - verbose_fprintf(stderr, 2, "Cpu%d C%-2d no energy model for [%s] (%d hits, %f duration)\n", - s_cpu->cpu_id, i, c->name, - c->nrdata, c->duration); - continue; + for (j = 0; j < c->nropp; j++) { + cp = find_cstate_energy_info(current_cluster, opps[j].speed/100, c->name); + if (!cp) { + verbose_fprintf(stderr, 2, "Cpu%d C%-2d no energy model for [%s] (%d hits, %f duration, %d speed)\n", + s_cpu->cpu_id, i, c->name, + c->nrdata, opps[j].duration, opps[j].speed); + continue; + } + cluster_idl += opps[j].duration * cp->core_idle_power; } - cluster_idl += c->duration * cp->core_idle_power; - verbose_fprintf(stderr, 1, "Cpu%d C%-2d +%7d hits for [%15s] | %13.0f | %7d | %7s | %12s | %12.0f | %12s |\n", + verbose_fprintf(stderr, 1, "Cpu%d C%-2d +%7d hits for [%15s] | %13.0f | %7s | %12s | %12.0f | %12s |\n", s_cpu->cpu_id, i, c->nrdata, c->name, c->duration, - cp->core_idle_power, "", "", cluster_idl, ""); diff --git a/idlestat.c b/idlestat.c index 4d773f4..687b484 100644 --- a/idlestat.c +++ b/idlestat.c @@ -338,6 +338,8 @@ struct cpuidle_cstates *build_cstate_info(int nrcpus) c->duration = 0.; c->target_residency = cpuidle_get_target_residency(cpu, i); + c->duration_opps = NULL; + c->nropp = 0; } } return cstates; @@ -739,6 +741,7 @@ static int cstate_begin(double time, int state, struct cpuidle_cstates *cstates) memset(data + nrdata, 0, sizeof(*data)); data[nrdata].begin = time; + data[nrdata].speed = cstates->current_speed; cstate->data = data; cstates->cstate_max = MAX(cstates->cstate_max, state); @@ -747,7 +750,34 @@ static int cstate_begin(double time, int state, struct cpuidle_cstates *cstates) return 0; } -static void cstate_end(double time, struct cpuidle_cstates *cstates) +static int update_duration_opp(struct cpuidle_cstate *cstate, unsigned int freq, double duration) +{ + struct duration_opp *opps = cstate->duration_opps; + int i; + + for (i = 0; i < cstate->nropp; i++) { + if (opps[i].speed == freq) { + opps[i].duration += duration; + return 0; + } + } + + opps = realloc(cstate->duration_opps, sizeof(*opps) * (cstate->nropp + 1)); + if (!opps) { + free(cstate->duration_opps); + return error(__func__); + } + + cstate->duration_opps = opps; + memset(opps + cstate->nropp, 0, sizeof(*opps)); + opps[cstate->nropp].speed = freq; + opps[cstate->nropp].duration += duration; + cstate->nropp++; + + return 0; +} + +static int cstate_end(double time, struct cpuidle_cstates *cstates) { int last_cstate = cstates->current_cstate; struct cpuidle_cstate *cstate = &cstates->cstate[last_cstate]; @@ -792,12 +822,17 @@ static void cstate_end(double time, struct cpuidle_cstates *cstates) cstate->max_time = MAX(cstate->max_time, data->duration); cstate->avg_time = AVG(cstate->avg_time, data->duration, cstate->nrdata + 1); + + if (update_duration_opp(cstate, data->speed, data->duration) < 0) + return error(__func__); + cstate->duration += data->duration; cstate->nrdata++; skip_entry: /* CPU is no longer idle */ cstates->current_cstate = -1; + return 0; } int record_cstate_event(struct cpuidle_cstates *cstates, @@ -810,13 +845,35 @@ int record_cstate_event(struct cpuidle_cstates *cstates, return 0; if (cstates->current_cstate != -1) - cstate_end(time, cstates); + ret = cstate_end(time, cstates); if (state != -1) ret = cstate_begin(time, state, cstates); return ret; } +static void core_update_current_speed(struct cpu_core *core) +{ + struct cpu_cpu *cpu; + unsigned int max_speed = 0; + + core_for_each_cpu(cpu, core) + max_speed = MAX(max_speed, cpu->cstates->current_speed); + + core->cstates->current_speed = max_speed; +} + +static void cluster_update_current_speed(struct cpu_physical *clust) +{ + struct cpu_cpu *cpu; + unsigned int max_speed = 0; + + cluster_for_each_cpu(cpu, clust) + max_speed = MAX(max_speed, cpu->cstates->current_speed); + + clust->cstates->current_speed = max_speed; +} + int store_data(double time, int state, int cpu, struct cpuidle_datas *datas) { @@ -843,11 +900,13 @@ int store_data(double time, int state, int cpu, /* Update core and cluster */ aff_core = cpu_to_core(cpu, datas->topo); state = core_get_least_cstate(aff_core); + core_update_current_speed(aff_core); if (record_cstate_event(aff_core->cstates, time, state) == -1) return -1; aff_cluster = cpu_to_cluster(cpu, datas->topo); state = cluster_get_least_cstate(aff_cluster); + cluster_update_current_speed(aff_cluster); if (record_cstate_event(aff_cluster->cstates, time,state) == -1) return -1; diff --git a/idlestat.h b/idlestat.h index e030f6a..2eef6c9 100644 --- a/idlestat.h +++ b/idlestat.h @@ -48,11 +48,17 @@ "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_cur_freq" struct cpuidle_data { + unsigned int speed; double begin; double end; double duration; }; +struct duration_opp { + unsigned int speed; + double duration; +}; + struct cpuidle_cstate { char *name; struct cpuidle_data *data; @@ -62,7 +68,9 @@ struct cpuidle_cstate { double avg_time; double max_time; double min_time; + struct duration_opp *duration_opps; double duration; + int nropp; int target_residency; /* -1 if not available */ }; @@ -83,6 +91,7 @@ struct cpuidle_cstates { struct cpuidle_cstate cstate[MAXCSTATE]; struct wakeup_info wakeinfo; int current_cstate; + unsigned int current_speed; int cstate_max; struct wakeup_irq *wakeirq; enum {as_expected, too_long, too_short} actual_residency; @@ -157,6 +166,7 @@ struct pstate_energy_info { struct cstate_energy_info { char cstate_name[NAMELEN]; + unsigned int speed; unsigned int cluster_idle_power; unsigned int core_idle_power; double cluster_duration; diff --git a/tracefile_idlestat.c b/tracefile_idlestat.c index 3430693..79f8578 100644 --- a/tracefile_idlestat.c +++ b/tracefile_idlestat.c @@ -108,6 +108,8 @@ static struct cpuidle_cstates *load_and_build_cstate_info(FILE* f, char *buffer, c->max_time = 0.; c->min_time = DBL_MAX; c->duration = 0.; + c->duration_opps = NULL; + c->nropp = 0; c->target_residency = residency; } fgets(buffer, BUFSIZE, f); @@ -146,6 +148,8 @@ int load_text_data_line(char *buffer, struct cpuidle_datas *datas, char *format, "be wrong.\n"); return -1; } + + datas->cstates[cpu].current_speed = freq; return cpu_change_pstate(datas, cpu, freq, time); } -- 1.9.1

10 years, 6 months

[PATCH v3] workgen: Add IO-bounded and memory-bounded events.

by Pi-Cheng Chen

From: "pi-cheng.chen" <pi-cheng.chen(a)linaro.org> Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section. The size of per-thread memory buffer to create IO/memory load could also be specified with "mem_buffer_size" in the same section. E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0", "mem_buffer_size" : 1048576 } Signed-off-by: pi-cheng.chen <pi-cheng.chen(a)linaro.org> --- changes from v2: - allocate / open resouces at parsing stage changes from v1: - Allocate per-thread memory buffer - Add "mem_buffer_size" in global section to specify the size of buffer - fix possible buffer overrun when creating IO-bounded load --- src/rt-app.c | 48 +++++++++++++++++++++++++++++++++++++++ src/rt-app_parse_config.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ src/rt-app_types.h | 16 +++++++++++++ 3 files changed, 122 insertions(+) diff --git a/src/rt-app.c b/src/rt-app.c index 37e9892..4e78844 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -179,6 +179,42 @@ static inline loadwait(unsigned long exec) return load_count; } +static void ioload(unsigned long count, struct _rtapp_iomem_buf *iomem, int io_fd) +{ + ssize_t ret; + + while (count != 0) { + unsigned long size; + + if (count > iomem->size) + size = iomem->size; + else + size = count; + + ret = write(io_fd, iomem->ptr, size); + if (ret == -1) { + perror("write"); + return; + } + count -= ret; + } +} + +static void memload(unsigned long count, struct _rtapp_iomem_buf *iomem) +{ + while (count > 0) { + unsigned long size; + + if (count > iomem->size) + size = iomem->size; + else + size = count; + + memset(iomem->ptr, 0, size); + count -= size; + } +} + static int run_event(event_data_t *event, int dry_run, unsigned long *perf, unsigned long *duration, rtapp_resource_t *resources) { @@ -271,6 +307,18 @@ static int run_event(event_data_t *event, int dry_run, pthread_mutex_unlock(&(ddata->res.mtx.obj)); break; } + case rtapp_mem: + { + log_debug("mem %d", event->count); + memload(event->count, &rdata->res.buf); + } + break; + case rtapp_iorun: + { + log_debug("iorun %d", event->count); + ioload(event->count, &rdata->res.buf, ddata->res.dev.fd); + } + break; } return lock; diff --git a/src/rt-app_parse_config.c b/src/rt-app_parse_config.c index 96e5517..def3070 100644 --- a/src/rt-app_parse_config.c +++ b/src/rt-app_parse_config.c @@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #define PIN2 PIN" " #define PIN3 PIN2" " #define JSON_FILE_BUF_SIZE 4096 +#define DEFAULT_MEM_BUF_SIZE (4 * 1024 * 1024) /* redefine foreach as in <json/json_object.h> but to be ANSI * compatible */ @@ -191,6 +192,21 @@ static int init_cond_resource(rtapp_resource_t *data, const rtapp_options_t *opt &data->res.cond.attr); } +static int init_membuf_resource(rtapp_resource_t *data, const rtapp_options_t *opts) +{ + log_info(PIN3 "Init: %s membuf", data->name); + + data->res.buf.ptr = malloc(opts->mem_buffer_size); + data->res.buf.size = opts->mem_buffer_size; +} + +static int init_iodev_resource(rtapp_resource_t *data, const rtapp_options_t *opts) +{ + log_info(PIN3 "Init: %s io device", data->name); + + data->res.dev.fd = open(opts->io_device, O_CREAT | O_WRONLY, 0644); +} + static void init_resource_data(const char *name, int type, int idx, const rtapp_options_t *opts) { @@ -211,6 +227,12 @@ init_resource_data(const char *name, int type, int idx, const rtapp_options_t *o case rtapp_wait: init_cond_resource(data, opts); break; + case rtapp_mem: + init_membuf_resource(data, opts); + break; + case rtapp_iorun: + init_iodev_resource(data, opts); + break; } } @@ -332,6 +354,32 @@ parse_thread_event_data(char *name, struct json_object *obj, return; } + if (!strncmp(name, "mem", strlen("mem")) || + !strncmp(name, "iorun", strlen("iorun"))) { + if (!json_object_is_type(obj, json_type_int)) + goto unknown_event; + + /* create an unique name for per-thread buffer */ + ref = create_unique_name(unique_name, sizeof(unique_name), "mem", tag); + i = get_resource_index(ref, rtapp_mem, opts); + data->res = i; + data->count = json_object_get_int(obj); + + /* A single IO devices for all threads */ + if (strncmp(name, "iorun", strlen("iorun")) == 0) { + i = get_resource_index("io_device", rtapp_iorun, opts); + data->dep = i; + }; + + if (!strncmp(name, "mem", strlen("mem"))) + data->type = rtapp_mem; + else + data->type = rtapp_iorun; + + log_info(PIN2 "type %d count %d", data->type, data->count); + return; + } + if (!strncmp(name, "lock", strlen("lock")) || !strncmp(name, "unlock", strlen("unlock"))) { @@ -526,6 +574,10 @@ obj_is_event(char *name) return rtapp_suspend; if (!strncmp(name, "resume", strlen("resume"))) return rtapp_resume; + if (!strncmp(name, "mem", strlen("mem"))) + return rtapp_mem; + if (!strncmp(name, "iorun", strlen("iorun"))) + return rtapp_iorun; return 0; } @@ -710,6 +762,8 @@ parse_global(struct json_object *global, rtapp_options_t *opts) opts->logbasename = strdup("rt-app"); opts->ftrace = 0; opts->pi_enabled = 0; + opts->io_device = strdup("/dev/null"); + opts->mem_buffer_size = DEFAULT_MEM_BUF_SIZE; return; } @@ -762,6 +816,10 @@ parse_global(struct json_object *global, rtapp_options_t *opts) TRUE, "rt-app"); opts->ftrace = get_bool_value_from(global, "ftrace", TRUE, 0); opts->pi_enabled = get_bool_value_from(global, "pi_enabled", TRUE, 0); + opts->io_device = get_string_value_from(global, "io_device", TRUE, + "/dev/null"); + opts->mem_buffer_size = get_int_value_from(global, "mem_buffer_size", + TRUE, DEFAULT_MEM_BUF_SIZE); } diff --git a/src/rt-app_types.h b/src/rt-app_types.h index 1075f64..aab96a7 100644 --- a/src/rt-app_types.h +++ b/src/rt-app_types.h @@ -65,6 +65,8 @@ typedef enum resource_t rtapp_timer, rtapp_suspend, rtapp_resume, + rtapp_mem, + rtapp_iorun, } resource_t; struct _rtapp_mutex { @@ -86,6 +88,15 @@ struct _rtapp_timer { int init; }; +struct _rtapp_iomem_buf { + char *ptr; + int size; +}; + +struct _rtapp_iodev { + int fd; +}; + /* Shared resources */ typedef struct _rtapp_resource_t { union { @@ -93,6 +104,8 @@ typedef struct _rtapp_resource_t { struct _rtapp_cond cond; struct _rtapp_signal signal; struct _rtapp_timer timer; + struct _rtapp_iomem_buf buf; + struct _rtapp_iodev dev; } res; int index; resource_t type; @@ -104,6 +117,7 @@ typedef struct _event_data_t { int res; int dep; int duration; + int count; } event_data_t; typedef struct _phase_data_t { @@ -166,6 +180,8 @@ typedef struct _rtapp_options_t { int ftrace; int die_on_dmiss; + int mem_buffer_size; + char *io_device; } rtapp_options_t; typedef struct _timing_point_t { -- 1.9.1

10 years, 6 months

[PATCH 00/11] rt-app: add new features and update example JSON file

by Pi-Cheng Chen

This series adds several new features to rt-app, e.g. signal, wait, broadcast, run, sleep ...etc. and updates the example taskset.json file to reflect those new features. These patches are also found: ssh://git@git.linaro.org/people/picheng.chen/rt-app.git features_series1 Sanjay Singh Rawat (1): add delay param in thread parameters Vincent Guittot (10): Add new wait, signal, and broadcast resources update the .json test file to reflect new capabilities add loop feature rt-app: make load frequency independent. update example file with new loop capabilities add sleep and run type of resources update taskset.json example with run and sleep capabilities make possible to remove the sleep step modify stat to reflect new behavior update example file with more capabilities doc/taskset.json | 104 ++++++++++++---------- src/rt-app.c | 155 +++++++++++++++++++++++++++------ src/rt-app_parse_config.c | 214 +++++++++++++++++++++++++++++++++++++--------- src/rt-app_types.h | 35 +++++++- src/rt-app_utils.c | 64 ++++++++++++++ src/rt-app_utils.h | 9 ++ 6 files changed, 467 insertions(+), 114 deletions(-) -- 1.9.1

10 years, 7 months

[PATCH] Idlestat: set reasonable size for trace buffer

by Leo Yan

Now if don't set the option -S for buffer size, idlestat will init ftrace buffer with below formula: percpu_buffer_size = ((2 * TRACE_IDLE_LENGTH * TRACE_IDLE_NRHITS_PER_SEC) + (TRACE_CPUFREQ_LENGTH * TRACE_CPUFREQ_NRHITS_PER_SEC)) * duration; According to current macros' definition, if want to run the test case for 40s, then idlestat will need the trace buffer for one CPU is: (2*196*10000 + 196*100) Bytes/second x 40 seconds = 157584000 Bytes; So need allocate 150MB for one CPU, if system have 8 CPUs, then totally need allocate 150MB * 8 = 1.2GB; finally it reports the failure in kernel if the system cannot allocate so much buffer size: [ 42.562531] idlestat: page allocation failure: order:0, mode:0x10d0 [ 42.568817] CPU: 2 PID: 819 Comm: idlestat Not tainted 4.2.0-rc4+ #106 [ 42.575372] Hardware name: HiKey Development Board (DT) [ 42.580610] Call trace: [ 42.583067] [<ffffffc00008a624>] dump_backtrace+0x0/0x164 [ 42.588465] [<ffffffc00008a7a4>] show_stack+0x1c/0x28 [ 42.593535] [<ffffffc0006d3b4c>] dump_stack+0x80/0xc4 [ 42.598589] [<ffffffc00018a064>] warn_alloc_failed+0xe4/0x13c [ 42.604355] [<ffffffc00018dac0>] __alloc_pages_nodemask+0x6a8/0x814 [ 42.610636] [<ffffffc000152774>] __rb_allocate_pages.isra.55+0x60/0x164 [ 42.617251] [<ffffffc000154718>] ring_buffer_resize+0x250/0x428 [ 42.623192] [<ffffffc000158808>] __tracing_resize_ring_buffer+0x54/0x17c [ 42.629905] [<ffffffc0001589d8>] tracing_entries_write+0xa8/0x158 [ 42.636000] [<ffffffc0001f2fec>] __vfs_write+0x44/0x130 [ 42.641240] [<ffffffc0001f3a50>] vfs_write+0x98/0x1a0 [ 42.646291] [<ffffffc0001f4630>] SyS_write+0x50/0xb0 After review the macro definition, TRACE_IDLE_NRHITS_PER_SEC is 10000; usually the CPU will enter/exit idle states much less than 1000 times per second, so reduce it to 1000 which will be enough for profiling. Signed-off-by: Leo Yan <leo.yan(a)linaro.org> --- trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trace.h b/trace.h index a2f5867..863c248 100644 --- a/trace.h +++ b/trace.h @@ -34,7 +34,7 @@ #define TRACE_FREE TRACE_PATH "/free_buffer" #define TRACE_FILE TRACE_PATH "/trace" #define TRACE_STAT_FILE TRACE_PATH "/per_cpu/cpu0/stats" -#define TRACE_IDLE_NRHITS_PER_SEC 10000 +#define TRACE_IDLE_NRHITS_PER_SEC 1000 #define TRACE_IDLE_LENGTH 196 #define TRACE_CPUFREQ_NRHITS_PER_SEC 100 #define TRACE_CPUFREQ_LENGTH 196 -- 1.9.1

10 years, 7 months

Re: [Sched-tools] Query: rt-app for EAS

by Amit Kucheria

Replacing the list address with one specific for scheduler tools. On Tue, Jul 14, 2015 at 12:27 PM, Leo Yan <leo.yan(a)linaro.org> wrote: > Hi all, > > I saw the energy model related patches have been posted [1], so i'd > like to ask some questions for the tool rt-app: > > - I downloaded rt-app from the address [2]; > > - For the case "rt-app [4]: mp3 playback use-case model", is it the > same one with doc/examples/mp3-long.json? > > - For the case "rt-app [4]: 5 ~[6,13,19,25,31,38,44,50]% periodic > (2ms) tasks for 30s", i have not seen there have .json scripts for > these cases; so could you help point where i can get related > scripts? > > - i read the case doc/examples/tutorial/example3.json, its comments > say "starts with a 10% load during 1sec (100 loops) and then > increase 90% for the next sec (100 loops)". > > it's confused me due i cannot calculate the same workload with > the comment clarified. For the phase "light", it only specifies the > flow as run 1ms + timer 30ms, so the workload only have 1/30 = > 3.33%; for the phase "heavy", it will get workload = 4/30 = 13.33%; > > Could you help confirm if i wrongly understand this case? > > { > /* > * Simple use case which starts with a 10% load during 1sec (100 loops) > * and then increase to 90% for the next sec (100 loops) > */ > "tasks" : { > "thread0" : { > "instance" : 12, > "loop" : 1, > "phases" : { > "light" : { > "loop" : 10, > "run" : 1000, > "timer" : { "ref" : "unique", "period" : 30000 } > }, > "heavy" : { > "loop" : 10, > "run" : 4000, > "timer" : { "ref" : "unique", "period" : 30000 } > } > } > } > }, > "global" : { > "logdir" : "./", > "log_basename" : "rt-app3", > "ftrace" : true, > "gnuplot" : true > } > } > > [1] http://thread.gmane.org/gmane.linux.power-management.general/62093 > [2] https://github.com/scheduler-tools/rt-app.git exp/eas_v5 > > Thanks, > Leo Yan

10 years, 7 months

← Newer
1
2
3
4
5
6
7
8
9
10
Older →

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

Sched-tools