Hi,
On Sat, Oct 4, 2014 at 7:33 AM, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
Initialize struct cpufreq_pstates with initial P-state of CPUs and allocate struct cpufreq_pstate dynamically when parsing trace file to solve the issue caused by missing "scaling_avaialable_freqs" attr when using intel_pstate driver.
Changes v2 to v3: Initialize struct cpufreq_pstates with initial P-state of all CPUs and beginning timestamp before trace acquisition and close all P-states with ending timestamp
hanges v1 to v2: Sort the cpufreq_pstate list when parsing events
Signed-off-by: Pi-Cheng Chen pi-cheng.chen@linaro.org
idlestat.c | 259 ++++++++++++++++++++++++++++++++++++++++++++----------------- idlestat.h | 7 ++ trace.h | 1 + 3 files changed, 195 insertions(+), 72 deletions(-)
diff --git a/idlestat.c b/idlestat.c index da615cb..8230067 100644 --- a/idlestat.c +++ b/idlestat.c @@ -536,6 +536,121 @@ static struct cpuidle_cstates *build_cstate_info(int nrcpus) return cstates; }
+#define TRACE_STAT_FORMAT "%*[^:]:%lf"
+static double get_trace_ts(void) +{
FILE *f;
double ts;
f = fopen(TRACE_STAT_FILE, "r");
if (!f)
return -1;
while (fgets(buffer, BUFSIZE, f)) {
if (!strstr(buffer, "now ts"))
continue;
if (!sscanf(buffer, TRACE_STAT_FORMAT, &ts))
ts = -1;
break;
}
fclose(f);
return ts;
+}
+static void release_init_pstates(struct init_pstates *init) +{
free(init->freqs);
free(init);
+}
+static struct init_pstates *build_init_pstates(void) +{
struct init_pstates *init;
int nr_cpus, cpu;
unsigned int *freqs;
nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
if (nr_cpus < 0)
return NULL;
init = malloc(sizeof(*init));
if (!init)
return NULL;
Note, init->freqs is uninitialized and used below in error handling path.
freqs = calloc(nr_cpus, sizeof(*freqs));
if (!freqs) {
free(init);
return NULL;
}
memset(freqs, 0, sizeof(*freqs) * nr_cpus);
(This is useless, either we assign every element in the loop or we release the memory.)
for (cpu = 0; cpu < nr_cpus; cpu++) {
char *fpath;
unsigned int *freq = &(freqs[cpu]);
if (asprintf(&fpath, CPUFREQ_CURFREQ_PATH_FORMAT, cpu) < 0) {
release_init_pstates(init);
The uninitialized variable is used within the called function. Plus freqs does not get released.
return NULL;
}
if (read_int(fpath, (int *)freq))
*freq = 0;
free(fpath);
}
init->nr_cpus = nr_cpus;
init->freqs = freqs;
Moving these two assignments before the loop would fix the above issue without leaking memory. Overall this is a minor issue, but lets fix it anyway.
return init;
+}
+/**
- alloc_pstate - allocate, sort, and initialize pstate struct
- to maintain statistics of P-state transitions
- @pstates: per-CPU P-state statistics struct
- @freq: frequency for which the newly pstate is allocated
- Return: the index of the newly allocated pstate struct
- */
+static int alloc_pstate(struct cpufreq_pstates *pstates, unsigned int freq) +{
struct cpufreq_pstate *pstate, *tmp;
int nrfreq, i, next = 0;
pstate = pstates->pstate;
nrfreq = pstates->max;
tmp = realloc(pstate, sizeof(*pstate) * (nrfreq + 1));
if (!tmp) {
perror("realloc pstate");
return -1;
}
pstate = tmp;
pstates->pstate = tmp;
pstates->max = nrfreq + 1;
for (i = 0; i < nrfreq && freq <= pstate[i].freq; i++)
;
next = i;
for (i = nrfreq; i > next && i > 0; i--) {
pstate[i] = pstate[i - 1];
pstate[i].id = i;
pstates->current = (pstates->current == i - 1)?
i : pstates->current;
}
The above itches me a bit. Readability would be better as:
memmove(pstate+next+1, pstate+next, sizeof(*pstate) * (nrfreq-next)); for (i = nrfreq, i > next; i--) pstate[i].id = i; if (pstates->current >= next) pstates->current++;
It's ok to move a memory block of 0 bytes. Again, minor.
pstate[next].id = next;
pstate[next].freq = freq;
pstate[next].count = 0;
pstate[next].min_time = DBL_MAX;
pstate[next].max_time = 0;
pstate[next].avg_time = 0;
pstate[next].duration = 0;
return next;
+}
/**
- release_pstate_info - free all P-state related structs
- @pstates: per-cpu array of P-state statistics structs
@@ -560,14 +675,16 @@ static void release_pstate_info(struct cpufreq_pstates *pstates, int nrcpus) return; }
-/**
- build_pstate_info - parse cpufreq sysfs entries and build per-CPU
- structs to maintain statistics of P-state transitions
+/* build_pstate_info - allocate and initialize per-CPU structs to
- maintain statistics of P-state transitions
- @nrcpus: number of CPUs
- @initp: initial P-state of CPUs before trace acquistion
*/
- @start_ts: timestamp when trace acquisition started
- Return: per-CPU array of structs (success) or NULL (error)
-static struct cpufreq_pstates *build_pstate_info(int nrcpus) +static struct cpufreq_pstates *build_pstate_info(int nrcpus,
struct init_pstates *initp, double start_ts)
{ int cpu; struct cpufreq_pstates *pstates; @@ -577,67 +694,28 @@ static struct cpufreq_pstates *build_pstate_info(int nrcpus) return NULL; memset(pstates, 0, sizeof(*pstates) * nrcpus);
for (cpu = 0; cpu < nrcpus; cpu++) {
struct cpufreq_pstate *pstate;
int nrfreq;
char *fpath, *freq, line[256];
FILE *sc_av_freq;
if (asprintf(&fpath, CPUFREQ_AVFREQ_PATH_FORMAT, cpu) < 0)
goto clean_exit;
/* read scaling_available_frequencies for the CPU */
sc_av_freq = fopen(fpath, "r");
free(fpath);
if (!sc_av_freq) {
fprintf(stderr, "warning: P-states not supported for "
"CPU%d\n", cpu);
continue;
}
freq = fgets(line, sizeof(line)/sizeof(line[0]), sc_av_freq);
fclose(sc_av_freq);
if (!freq) {
/* unlikely to be here, but just in case... */
fprintf(stderr, "warning: P-state info not found for "
"CPU%d\n", cpu);
continue;
}
if (initp)
assert(initp->nr_cpus == nrcpus);
/* tokenize line and populate each frequency */
nrfreq = 0;
pstate = NULL;
while ((freq = strtok(freq, "\n ")) != NULL) {
struct cpufreq_pstate *tmp = realloc(pstate, sizeof(*pstate) * (nrfreq+1));
if (!tmp)
goto clean_exit;
pstate = tmp;
/* initialize pstate record */
pstate[nrfreq].id = nrfreq;
pstate[nrfreq].freq = atol(freq);
pstate[nrfreq].count = 0;
pstate[nrfreq].min_time = DBL_MAX;
pstate[nrfreq].max_time = 0.;
pstate[nrfreq].avg_time = 0.;
pstate[nrfreq].duration = 0.;
nrfreq++;
freq = NULL;
for (cpu = 0; cpu < nrcpus; cpu++) {
struct cpufreq_pstates *ps = &(pstates[cpu]);
ps->pstate = NULL;
ps->max = 0;
ps->current = -1; /* unknown */
ps->idle = -1; /* unknown */
ps->time_enter = 0.;
ps->time_exit = 0.;
if (initp && initp->freqs[cpu] > 0 && start_ts > 0) {
ps->current = alloc_pstate(ps, initp->freqs[cpu]);
assert(ps->current >= 0);
ps->time_enter = start_ts;
ps->idle = 0; }
This block is not solving the problem completely, see below.
Note, setting ps->idle to 0 is not really fixing the bug in cpu_change_pstate calling open_next_pstate with idle set to -1. I assume you added that line because of the warnings generated, right? (It is a separate issue, don't fix that bug in this patch. But we shouldn't really hide it either.)
/* now populate cpufreq_pstates for this CPU */
pstates[cpu].pstate = pstate;
pstates[cpu].max = nrfreq;
pstates[cpu].current = -1; /* unknown */
pstates[cpu].idle = -1; /* unknown */
pstates[cpu].time_enter = 0.;
pstates[cpu].time_exit = 0.; } return pstates;
-clean_exit:
release_pstate_info(pstates, nrcpus);
return NULL;
}
static int get_current_pstate(struct cpuidle_datas *datas, int cpu, @@ -712,6 +790,9 @@ static void cpu_change_pstate(struct cpuidle_datas *datas, int cpu,
cur = get_current_pstate(datas, cpu, &ps, &p); next = freq_to_pstate_index(ps, freq);
if (next < 0)
next = alloc_pstate(ps, freq);
assert(next >= 0); switch (cur) { case 1:
@@ -742,6 +823,19 @@ static void cpu_change_pstate(struct cpuidle_datas *datas, int cpu, } }
+static void cpu_close_all_pstate(struct cpuidle_datas *datas, double time) +{
struct cpufreq_pstates *ps;
struct cpufreq_pstate *p;
int cpu, cur;
for (cpu = 0; cpu < datas->nrcpus; cpu++) {
cur = get_current_pstate(datas, cpu, &ps, &p);
if (p && !cur && time > 0)
close_current_pstate(ps, time);
}
+}
Not needed / should not be created, see below. Not solving the problem completely.
static void cpu_pstate_idle(struct cpuidle_datas *datas, int cpu, double time) { struct cpufreq_pstates *ps = &(datas->pstates[cpu]); @@ -763,7 +857,6 @@ static int store_data(double time, int state, int cpu, struct cpuidle_datas *datas, int count) { struct cpuidle_cstates *cstates = &datas->cstates[cpu];
struct cpufreq_pstate *pstate = datas->pstates[cpu].pstate; struct cpuidle_cstate *cstate; struct cpuidle_data *data, *tmp; int nrdata, last_cstate = cstates->last_cstate;
@@ -826,9 +919,8 @@ static int store_data(double time, int state, int cpu, /* need indication if CPU is idle or not */ cstates->last_cstate = -1;
/* update P-state stats if supported */
if (pstate)
cpu_pstate_running(datas, cpu, time);
/* update P-state stats */
cpu_pstate_running(datas, cpu, time); return 0; }
@@ -846,9 +938,8 @@ static int store_data(double time, int state, int cpu, cstates->cstate_max = MAX(cstates->cstate_max, state); cstates->last_cstate = state; cstates->wakeirq = NULL;
/* update P-state stats if supported */
if (pstate)
cpu_pstate_idle(datas, cpu, time);
/* update P-state stats*/
cpu_pstate_idle(datas, cpu, time); return 0;
} @@ -932,7 +1023,8 @@ static int get_wakeup_irq(struct cpuidle_datas *datas, char *buffer, int count) return -1; }
-static struct cpuidle_datas *idlestat_load(struct program_options *options) +static struct cpuidle_datas *idlestat_load(struct program_options *options,
struct init_pstates *initp, double start_ts, double end_ts)
Now this is my main objection. Instead of hacking into load, you need to tweak the store. Your changes do not work as intended with --report, as initp, start_ts and end_ts will not be properly set without --trace.
Instead of modifying load and adding close_all_pstates and such, hack the store so that after storing the topology, we emit initial pstate change for all CPUs at start_ts to frequency specified by initp. Then, after the actual trace entries have been copied, store final pstate change into MAXINT or some other value that is not a real frequency. Then things should work equally for --trace and --report (and compare).
While at it, why don't we break this change into 2 separate patches. The dynamic pstate allocation, which in principle is already sound (with or without changing the minor issues I mentioned) and then the part where we try to get the leading and trailing pstate information into the report, which needs to be changed from faking stuff in load into writing the data into trace properly.
Tuukka
{ FILE *f; unsigned int state = 0, freq = 0, cpu = 0, nrcpus = 0; @@ -988,14 +1080,13 @@ static struct cpuidle_datas *idlestat_load(struct program_options *options) return ptrerror("build_cstate_info: out of memory"); }
datas->pstates = build_pstate_info(nrcpus);
datas->pstates = build_pstate_info(nrcpus, initp, start_ts); if (!datas->pstates) { free(datas->cstates); free(datas); fclose(f); return ptrerror("build_pstate_info: out of memory"); }
datas->nrcpus = nrcpus; /* read topology information */
@@ -1018,7 +1109,6 @@ static struct cpuidle_datas *idlestat_load(struct program_options *options) } else if (strstr(buffer, "cpu_frequency")) { assert(sscanf(buffer, TRACE_FORMAT, &time, &freq, &cpu) == 3);
assert(datas->pstates[cpu].pstate != NULL); cpu_change_pstate(datas, cpu, freq, time); count++; continue;
@@ -1031,6 +1121,9 @@ static struct cpuidle_datas *idlestat_load(struct program_options *options)
fclose(f);
/* close all p-state with end timestamp */
cpu_close_all_pstate(datas, end_ts);
fprintf(stderr, "Log is %lf secs long with %zd events\n", end - begin, count);
@@ -1481,7 +1574,9 @@ int main(int argc, char *argv[], char *const envp[]) { struct cpuidle_datas *datas; struct program_options options;
struct init_pstates *initp = NULL; int args;
double start_ts, end_ts; args = getoptions(argc, argv, &options); if (args <= 0)
@@ -1527,6 +1622,14 @@ int main(int argc, char *argv[], char *const envp[]) if (idlestat_flush_trace()) return -1;
/* Get current P-state of all CPUs */
if (options.display & FREQUENCY_DISPLAY)
initp = build_init_pstates();
/* Get timestamp before trace acquisition */
if (options.display & FREQUENCY_DISPLAY)
start_ts = get_trace_ts();
/* Start the recording */ if (idlestat_trace_enable(true)) return -1;
@@ -1549,6 +1652,17 @@ int main(int argc, char *argv[], char *const envp[]) if (idlestat_trace_enable(false)) return -1;
/* Get timestamp after trace acquisition */
if (options.display & FREQUENCY_DISPLAY)
end_ts = get_trace_ts();
/* Emit warning messages when P-state info might be partial */
if (options.display & FREQUENCY_DISPLAY &&
(!initp || start_ts < 0 || end_ts < 0))
fprintf(stderr, "Unable to get initial P-state,"
" beginning timestamp, or ending timestamp!\n"
"P-state statistics data might be partial!\n");
/* At this point we should have some spurious wake up * at the beginning of the traces and at the end (wake * up all cpus and timer expiration for the timer
@@ -1559,7 +1673,7 @@ int main(int argc, char *argv[], char *const envp[]) }
/* Load the idle states information */
datas = idlestat_load(&options);
datas = idlestat_load(&options, initp, start_ts, end_ts); if (!datas) return 1;
@@ -1590,6 +1704,7 @@ int main(int argc, char *argv[], char *const envp[]) } }
release_init_pstates(initp); release_cpu_topo_cstates(); release_cpu_topo_info(); release_pstate_info(datas->pstates, datas->nrcpus);
diff --git a/idlestat.h b/idlestat.h index 735f0fe..39ba4d7 100644 --- a/idlestat.h +++ b/idlestat.h @@ -41,6 +41,8 @@ "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/residency" #define CPUFREQ_AVFREQ_PATH_FORMAT \ "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_available_frequencies" +#define CPUFREQ_CURFREQ_PATH_FORMAT \
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_cur_freq"
#define CPUIDLE_STATENAME_PATH_FORMAT \ "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name"
@@ -63,6 +65,11 @@ struct cpuidle_cstate { int target_residency; /* -1 if not available */ };
+struct init_pstates {
int nr_cpus;
unsigned int *freqs;
+};
enum IRQ_TYPE { HARD_IRQ = 0, IPI_IRQ, diff --git a/trace.h b/trace.h index bef6703..90b4a6a 100644 --- a/trace.h +++ b/trace.h @@ -33,6 +33,7 @@ #define TRACE_EVENT_PATH TRACE_PATH "/events/enable" #define TRACE_FREE TRACE_PATH "/free_buffer" #define TRACE_FILE TRACE_PATH "/trace" +#define TRACE_STAT_FILE TRACE_PATH "/per_cpu/cpu0/stats" #define TRACE_IDLE_NRHITS_PER_SEC 10000 #define TRACE_IDLE_LENGTH 196
#define TRACE_CPUFREQ_NRHITS_PER_SEC 100
1.9.1
linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev