From: Xunlei Pang pang.xunlei@linaro.org
BACKGROUND Overview: Currently idlestat only handles a energy model with a single value for energy savings when going to WFI. IOW, idlestat does not know what OPP (frequency, voltage) is this WFI energy savings calculated at.
Add funtionality to allow a table of OPP and energy saving values to be listed so that the savings at various frequencies can be modeled correctly.
As an example, a platform may have different power consumption when idled at different OPPs, as follow:
@1989 MHz: single core in WFI consumes about 120 mW @1001 MHz: single core in WFI consumes about 45 mW @ 507 MHz: single core in WFI consumes about 30 mW
This data is useful to optimise a platform's idle policy.
MODIFICATION METHOD Now we want to modify the format of idlestat's energy model to accomodate the different C-states energy data at different OPPs.
Let's take a simple example to explicate the way: Assuming different C-States energy data below: 1200Mhz: WFI 80 10 1200Mhz: cluster-sleep-b 30 5
1000Mhz: WFI 70 0 1000Mhz: cluster-sleep-b 25 0
Before modification: clusterA: 2 cap states 2 C states
P-states: 1200 15200 6997 1000 8446 3846
C-states: WFI 70 0 cluster-sleep-b 25 0
After modification: clusterA: 2 cap states 2 C states
P-state C-states pair 1: 1200 15200 6997 WFI 80 10 cluster-sleep-b 30 5
P-state C-states pair 2: 1000 8446 3846 WFI 70 0 cluster-sleep-b 25 0
In this way, we ensure P-states and C-States always appear in pairs.
Signed-off-by: Xunlei Pang pang.xunlei@linaro.org --- energy_model.c | 104 ++++++++++++++++++++++----------------------------- idlestat.c | 64 +++++++++++++++++++++++++++++-- idlestat.h | 10 +++++ trace_ops.h | 1 - tracefile_idlestat.c | 4 ++ 5 files changed, 120 insertions(+), 63 deletions(-)
diff --git a/energy_model.c b/energy_model.c index 2ea6fdd..3932c23 100644 --- a/energy_model.c +++ b/energy_model.c @@ -75,7 +75,7 @@ static int make_energy_model_template(struct program_options *options) list_for_each_entry(s_phy, &cpu_topo->physical_head, list_physical) { unsigned int num_cap_states = 0; unsigned int num_c_states = 0; - int i; + int i, j;
s_core = list_entry((&s_phy->core_head)->prev, struct cpu_core, list_core); s_cpu = list_entry((&s_core->cpu_head)->prev, struct cpu_cpu, list_cpu); @@ -89,27 +89,23 @@ static int make_energy_model_template(struct program_options *options) num_cap_states++; }
- fprintf(f, "\nC-states:\n"); - fprintf(f, "cluster%c: %d cap states %d C states\n\n", cluster_number + 'A', num_cap_states, num_c_states); - fprintf(f, "P-states:\n"); - fprintf(f, "# speed, cluster power, core power\n"); - for (i = 0; i < s_cpu->pstates->max; i++) { struct cpufreq_pstate *p = &s_cpu->pstates->pstate[i];
if (p->freq == 0) continue;
+ fprintf(f, "P-state C-states pair %d:\n", i + 1); + fprintf(f, "# P-state: speed, cluster power, core power\n"); fprintf(f, "%d\t\t?\t?\n", p->freq/1000); - } - fprintf(f, "\nC-states:\n"); - fprintf(f, "# name, cluster power, core power\n"); - for (i = 0; i < s_cpu->cstates->cstate_max + 1; i++) { - struct cpuidle_cstate *c = &s_cpu->cstates->cstate[i]; + fprintf(f, "# C-states: name, cluster power, core power\n"); + for (j = 0; j < s_cpu->cstates->cstate_max + 1; j++) { + struct cpuidle_cstate *c = &s_cpu->cstates->cstate[j];
- fprintf(f, "%s\t\t?\t?\n", c->name); + fprintf(f, "%s\t\t?\t?\n", c->name); + } } fprintf(f, "\n"); cluster_number++; @@ -130,8 +126,8 @@ int parse_energy_model(struct program_options *options) struct cluster_energy_info *clustp = NULL; unsigned int number_cap_states, number_c_states; int current_cluster = -1; - unsigned int current_pstate = 0; - unsigned int current_cstate = 0; + unsigned int current_pstate, speed; + unsigned int current_cstate; unsigned int clust_p, core_p; char buffer[BUFSIZE]; char *path = options->energy_model_filename; @@ -196,12 +192,13 @@ int parse_energy_model(struct program_options *options) clustp->number_c_states = number_c_states; clustp->p_energy = calloc(number_cap_states, sizeof(struct pstate_energy_info)); - clustp->c_energy = calloc(number_c_states, + clustp->c_energy = calloc(number_c_states*number_cap_states, sizeof(struct cstate_energy_info)); clustp->state = parsed_cluster_info; + current_pstate = 0; continue; } - if (strstr(buffer, "P-states")) { + if (strstr(buffer, "P-state C-states pair")) { if (current_cluster == -1) { fprintf(stderr, "%s: unknown cluster (cap states) in %s\n", __func__, path); @@ -214,27 +211,9 @@ int parse_energy_model(struct program_options *options) fclose(f); return -1; } - current_pstate = 0; clustp->state = parsing_cap_states; continue; } - if (strstr(buffer, "C-states")) { - if (current_cluster == -1) { - fprintf(stderr, "%s: unknown cluster (c states) in %s\n", - __func__, path); - fclose(f); - return -1; - } - if (clustp->state < parsed_cluster_info) { - fprintf(stderr, "%s: number of c states for cluster%c not specified in %s\n", - __func__, current_cluster, path); - fclose(f); - return -1; - } - current_cstate = 0; - clustp->state = parsing_c_states; - continue; - } if (strstr(buffer, "wakeup")) { unsigned int clust_w, core_w;
@@ -254,10 +233,9 @@ int parse_energy_model(struct program_options *options) __func__, path); fclose(f); return -1; - } + } if (clustp->state == parsing_cap_states) { struct pstate_energy_info *pp; - unsigned int speed;
if (sscanf(buffer, "%d %d %d", &speed, &clust_p, &core_p) != 3) { fprintf(stderr, "%s: expected P state (speed cluster core) for cluster%c in %s\n", @@ -276,6 +254,8 @@ int parse_energy_model(struct program_options *options) pp->speed = speed; pp->cluster_power = clust_p; pp->core_power = core_p; + clustp->state = parsing_c_states; + current_cstate = 0; continue; } if (clustp->state == parsing_c_states) { @@ -295,8 +275,9 @@ int parse_energy_model(struct program_options *options) fclose(f); return -1; } - cp = &clustp->c_energy[current_cstate++]; + cp = &clustp->c_energy[(current_pstate - 1) * clustp->number_c_states + current_cstate++]; strncpy(cp->cstate_name, name, NAMELEN); + cp->speed = speed; cp->cluster_idle_power = clust_p; cp->core_idle_power = core_p; continue; @@ -309,7 +290,7 @@ int parse_energy_model(struct program_options *options) return 0; }
-static struct cstate_energy_info *find_cstate_energy_info(const unsigned int cluster, const char *name) +static struct cstate_energy_info *find_cstate_energy_info(const unsigned int cluster, unsigned int speed, const char *name) { struct cluster_energy_info *clustp; struct cstate_energy_info *cp; @@ -317,8 +298,8 @@ static struct cstate_energy_info *find_cstate_energy_info(const unsigned int clu
clustp = cluster_energy_table + cluster; cp = &clustp->c_energy[0]; - for (i = 0; i < clustp->number_c_states; i++, cp++) { - if (!strcmp(cp->cstate_name, name)) return cp; + for (i = 0; i < clustp->number_c_states * clustp->number_cap_states; i++, cp++) { + if (cp->speed == speed && !strcmp(cp->cstate_name, name)) return cp; } return NULL; } @@ -377,6 +358,7 @@ void calculate_energy_consumption(struct cpu_topology *cpu_topo)
for (j = 0; j < s_phy->cstates->cstate_max + 1; j++) { struct cpuidle_cstate *c = &s_phy->cstates->cstate[j]; + struct duration_opp *opps = c->duration_opps;
if (c->nrdata == 0) { verbose_fprintf(stderr, 2, @@ -385,19 +367,20 @@ void calculate_energy_consumption(struct cpu_topology *cpu_topo) continue; }
- cp = find_cstate_energy_info(current_cluster, c->name); - if (!cp) { - verbose_fprintf(stderr, 2, " C%-2d no energy model for [%s] (%d hits, %f duration)\n", - j, c->name, c->nrdata, c->duration); - continue; - } + for (i = 0; i < c->nropp; i++) { + cp = find_cstate_energy_info(current_cluster, opps[i].speed/1000, c->name); + if (!cp) { + verbose_fprintf(stderr, 2, " C%-2d no energy model for [%s] (%d hits, %f duration) %d speed\n", + j, c->name, c->nrdata, opps[i].duration, opps[i].speed); + continue; + }
- cluster_idl += c->duration * cp->cluster_idle_power; + cluster_idl += opps[i].duration * cp->cluster_idle_power; + }
- verbose_fprintf(stderr, 1, " C%-2d +%7d hits for [%15s] | %13.0f | %7d | %7s | %12s | %12.0f | %12s |\n", + verbose_fprintf(stderr, 1, " C%-2d +%7d hits for [%15s] | %13.0f | %7s | %12s | %12.0f | %12s |\n", j, c->nrdata, c->name, c->duration, - cp->cluster_idle_power, "", "", cluster_idl, ""); @@ -415,7 +398,7 @@ void calculate_energy_consumption(struct cpu_topology *cpu_topo)
pp = find_pstate_energy_info(current_cluster, p->freq/1000); if (!pp) { - verbose_fprintf(stderr, 2, "Cluster %c frequency %u MHz no energy model for [%d] (%d hits, %f duration)\n", + verbose_fprintf(stderr, 1, "Cluster %c frequency %u MHz no energy model for [%d] (%d hits, %f duration)\n", s_phy->physical_id + 'A', p->freq/1000, p->count, p->duration); continue; @@ -442,25 +425,28 @@ void calculate_energy_consumption(struct cpu_topology *cpu_topo)
for (i = 0; i < s_cpu->cstates->cstate_max + 1; i++) { struct cpuidle_cstate *c = &s_cpu->cstates->cstate[i]; + struct duration_opp *opps = c->duration_opps; + if (c->nrdata == 0) { verbose_fprintf(stderr, 2, "Cpu%d C%-2d no hits for [%15s] | 0 | 0 | | | 0 | |\n", s_cpu->cpu_id, i, c->name); continue; } - cp = find_cstate_energy_info(current_cluster, c->name); - if (!cp) { - verbose_fprintf(stderr, 2, "Cpu%d C%-2d no energy model for [%s] (%d hits, %f duration)\n", - s_cpu->cpu_id, i, c->name, - c->nrdata, c->duration); - continue; + for (j = 0; j < c->nropp; j++) { + cp = find_cstate_energy_info(current_cluster, opps[j].speed/100, c->name); + if (!cp) { + verbose_fprintf(stderr, 2, "Cpu%d C%-2d no energy model for [%s] (%d hits, %f duration, %d speed)\n", + s_cpu->cpu_id, i, c->name, + c->nrdata, opps[j].duration, opps[j].speed); + continue; + } + cluster_idl += opps[j].duration * cp->core_idle_power; } - cluster_idl += c->duration * cp->core_idle_power;
- verbose_fprintf(stderr, 1, "Cpu%d C%-2d +%7d hits for [%15s] | %13.0f | %7d | %7s | %12s | %12.0f | %12s |\n", + verbose_fprintf(stderr, 2, "Cpu%d C%-2d +%7d hits for [%15s] | %13.0f | %7s | %12s | %12.0f | %12s |\n", s_cpu->cpu_id, i, c->nrdata, c->name, c->duration, - cp->core_idle_power, "", "", cluster_idl, ""); diff --git a/idlestat.c b/idlestat.c index 4d773f4..75ac1fc 100644 --- a/idlestat.c +++ b/idlestat.c @@ -335,9 +335,10 @@ struct cpuidle_cstates *build_cstate_info(int nrcpus) c->avg_time = 0.; c->max_time = 0.; c->min_time = DBL_MAX; - c->duration = 0.; c->target_residency = cpuidle_get_target_residency(cpu, i); + c->duration_opps = NULL; + c->nropp = 0; } } return cstates; @@ -739,6 +740,7 @@ static int cstate_begin(double time, int state, struct cpuidle_cstates *cstates) memset(data + nrdata, 0, sizeof(*data));
data[nrdata].begin = time; + data[nrdata].speed = cstates->current_speed;
cstate->data = data; cstates->cstate_max = MAX(cstates->cstate_max, state); @@ -747,7 +749,34 @@ static int cstate_begin(double time, int state, struct cpuidle_cstates *cstates) return 0; }
-static void cstate_end(double time, struct cpuidle_cstates *cstates) +static int update_duration_opp(struct cpuidle_cstate *cstate, unsigned int freq, double duration) +{ + struct duration_opp *opps = cstate->duration_opps; + int i; + + for (i = 0; i < cstate->nropp; i++) { + if (opps[i].speed == freq) { + opps[i].duration += duration; + return 0; + } + } + + opps = realloc(cstate->duration_opps, sizeof(*opps) * (cstate->nropp + 1)); + if (!opps) { + free(cstate->duration_opps); + return error(__func__); + } + + cstate->duration_opps = opps; + memset(opps + cstate->nropp, 0, sizeof(*opps)); + opps[cstate->nropp].speed = freq; + opps[cstate->nropp].duration += duration; + cstate->nropp++; + + return 0; +} + +static int cstate_end(double time, struct cpuidle_cstates *cstates) { int last_cstate = cstates->current_cstate; struct cpuidle_cstate *cstate = &cstates->cstate[last_cstate]; @@ -792,12 +821,17 @@ static void cstate_end(double time, struct cpuidle_cstates *cstates) cstate->max_time = MAX(cstate->max_time, data->duration); cstate->avg_time = AVG(cstate->avg_time, data->duration, cstate->nrdata + 1); + + if (update_duration_opp(cstate, data->speed, data->duration) < 0) + return error(__func__); + cstate->duration += data->duration; cstate->nrdata++;
skip_entry: /* CPU is no longer idle */ cstates->current_cstate = -1; + return 0; }
int record_cstate_event(struct cpuidle_cstates *cstates, @@ -810,13 +844,35 @@ int record_cstate_event(struct cpuidle_cstates *cstates, return 0;
if (cstates->current_cstate != -1) - cstate_end(time, cstates); + ret = cstate_end(time, cstates); if (state != -1) ret = cstate_begin(time, state, cstates);
return ret; }
+static void core_update_current_speed(struct cpu_core *core) +{ + struct cpu_cpu *cpu; + unsigned int max_speed = 0; + + core_for_each_cpu(cpu, core) + max_speed = MAX(max_speed, cpu->cstates->current_speed); + + core->cstates->current_speed = max_speed; +} + +static void cluster_update_current_speed(struct cpu_physical *clust) +{ + struct cpu_cpu *cpu; + unsigned int max_speed = 0; + + cluster_for_each_cpu(cpu, clust) + max_speed = MAX(max_speed, cpu->cstates->current_speed); + + clust->cstates->current_speed = max_speed; +} + int store_data(double time, int state, int cpu, struct cpuidle_datas *datas) { @@ -843,11 +899,13 @@ int store_data(double time, int state, int cpu, /* Update core and cluster */ aff_core = cpu_to_core(cpu, datas->topo); state = core_get_least_cstate(aff_core); + core_update_current_speed(aff_core); if (record_cstate_event(aff_core->cstates, time, state) == -1) return -1;
aff_cluster = cpu_to_cluster(cpu, datas->topo); state = cluster_get_least_cstate(aff_cluster); + cluster_update_current_speed(aff_cluster); if (record_cstate_event(aff_cluster->cstates, time,state) == -1) return -1;
diff --git a/idlestat.h b/idlestat.h index e030f6a..2eef6c9 100644 --- a/idlestat.h +++ b/idlestat.h @@ -48,11 +48,17 @@ "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_cur_freq"
struct cpuidle_data { + unsigned int speed; double begin; double end; double duration; };
+struct duration_opp { + unsigned int speed; + double duration; +}; + struct cpuidle_cstate { char *name; struct cpuidle_data *data; @@ -62,7 +68,9 @@ struct cpuidle_cstate { double avg_time; double max_time; double min_time; + struct duration_opp *duration_opps; double duration; + int nropp; int target_residency; /* -1 if not available */ };
@@ -83,6 +91,7 @@ struct cpuidle_cstates { struct cpuidle_cstate cstate[MAXCSTATE]; struct wakeup_info wakeinfo; int current_cstate; + unsigned int current_speed; int cstate_max; struct wakeup_irq *wakeirq; enum {as_expected, too_long, too_short} actual_residency; @@ -157,6 +166,7 @@ struct pstate_energy_info {
struct cstate_energy_info { char cstate_name[NAMELEN]; + unsigned int speed; unsigned int cluster_idle_power; unsigned int core_idle_power; double cluster_duration; diff --git a/trace_ops.h b/trace_ops.h index 6d097ff..41258b6 100644 --- a/trace_ops.h +++ b/trace_ops.h @@ -46,5 +46,4 @@ extern void load_text_data_lines(FILE *f, char *buffer, struct cpuidle_datas *da * tracetype_name ## _trace_ptr = &tracetype_name##_trace_ops
extern const struct trace_ops *trace_ops_head; - #endif diff --git a/tracefile_idlestat.c b/tracefile_idlestat.c index 3430693..79f8578 100644 --- a/tracefile_idlestat.c +++ b/tracefile_idlestat.c @@ -108,6 +108,8 @@ static struct cpuidle_cstates *load_and_build_cstate_info(FILE* f, char *buffer, c->max_time = 0.; c->min_time = DBL_MAX; c->duration = 0.; + c->duration_opps = NULL; + c->nropp = 0; c->target_residency = residency; } fgets(buffer, BUFSIZE, f); @@ -146,6 +148,8 @@ int load_text_data_line(char *buffer, struct cpuidle_datas *datas, char *format, "be wrong.\n"); return -1; } + + datas->cstates[cpu].current_speed = freq; return cpu_change_pstate(datas, cpu, freq, time); }