linaro-dev January 2013

linaro-dev@lists.linaro.org

82 participants
82 discussions

[powerdebug,1/1] clock: support common clock framework

by Sanjay Singh Rawat

Signed-off-by: Sanjay Singh Rawat <sanjay.rawat(a)linaro.com> --- clocks.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/clocks.c b/clocks.c index 2611a0d..95acf57 100644 --- a/clocks.c +++ b/clocks.c @@ -42,9 +42,19 @@ struct clock_info { int usecount; bool expanded; char *prefix; + int preparecount; + int enablecount; + int notifiercount; } *clocks_info; +enum clock_fw_type{ + CCF, /* common clock framework */ + OCF, /* old clock framework */ + MAX, +}; + static struct tree *clock_tree = NULL; +static int clock_fw; static int locate_debugfs(char *clk_path) { @@ -144,9 +154,18 @@ static inline int read_clock_cb(struct tree *t, void *data) { struct clock_info *clk = t->private; - file_read_value(t->path, "flags", "%x", &clk->flags); - file_read_value(t->path, "rate", "%d", &clk->rate); - file_read_value(t->path, "usecount", "%d", &clk->usecount); + if(clock_fw == CCF) { + file_read_value(t->path, "clk_flags", "%x", &clk->flags); + file_read_value(t->path, "clk_rate", "%d", &clk->rate); + file_read_value(t->path, "clk_prepare_count", "%d", &clk->preparecount); + file_read_value(t->path, "clk_enable_count", "%d", &clk->enablecount); + file_read_value(t->path, "clk_notifier_count", "%d", &clk->notifiercount); + } + else { + file_read_value(t->path, "flags", "%x", &clk->flags); + file_read_value(t->path, "rate", "%d", &clk->rate); + file_read_value(t->path, "usecount", "%d", &clk->usecount); + } return 0; } @@ -206,9 +225,17 @@ static char *clock_line(struct tree *t) if (asprintf(&clkrate, "%d%s", rate, clkunit) < 0) goto free_clkname; - if (asprintf(&clkline, "%-55s 0x%-16x %-12s %-9d %-8d", clkname, - clk->flags, clkrate, clk->usecount, t->nrchild) < 0) - goto free_clkrate; + if(clock_fw == CCF) { + if (asprintf(&clkline, "%-35s 0x%-8x %-12s %-10d %-11d %-15d %-14d %-10d", + clkname, clk->flags, clkrate, clk->usecount, t->nrchild, + clk->preparecount, clk->enablecount, clk->notifiercount) < 0) + goto free_clkrate; + } + else { + if (asprintf(&clkline, "%-55s 0x%-16x %-12s %-9d %-8d", + clkname, clk->flags, clkrate, clk->usecount, t->nrchild) < 0) + goto free_clkrate; + } free_clkrate: free(clkrate); @@ -259,9 +286,17 @@ static int clock_print_header(void) char *buf; int ret; - if (asprintf(&buf, "%-55s %-16s %-12s %-9s %-8s", + if(clock_fw == CCF) { + if (asprintf(&buf, "%-35s %-10s %-12s %-10s %-11s %-15s %-14s %-14s", + "Name", "Flags", "Rate", "Usecount", "Children", "Prepare_Count", + "Enable_Count", "Notifier_Count") < 0) + return -1; + } + else { + if (asprintf(&buf, "%-55s %-16s %-12s %-9s %-8s", "Name", "Flags", "Rate", "Usecount", "Children") < 0) return -1; + } ret = display_column_name(buf); @@ -384,17 +419,25 @@ static struct display_ops clock_ops = { */ int clock_init(void) { - char clk_dir_path[PATH_MAX]; + char clk_dir_path[MAX+1][PATH_MAX]; - if (locate_debugfs(clk_dir_path)) + if (locate_debugfs(clk_dir_path[CCF]) || locate_debugfs(clk_dir_path[OCF])) return -1; - sprintf(clk_dir_path, "%s/clock", clk_dir_path); - - if (access(clk_dir_path, F_OK)) + sprintf(clk_dir_path[CCF], "%s/clk", clk_dir_path[CCF]); + sprintf(clk_dir_path[OCF], "%s/clock", clk_dir_path[OCF]); + if (!access(clk_dir_path[CCF], F_OK)) { + clock_fw = CCF; + strcpy(clk_dir_path[MAX],clk_dir_path[CCF]); + } + else if(!access(clk_dir_path[OCF], F_OK)) { + clock_fw = OCF; + strcpy(clk_dir_path[MAX],clk_dir_path[OCF]); + } + else return -1; - clock_tree = tree_load(clk_dir_path, NULL, false); + clock_tree = tree_load(clk_dir_path[MAX], NULL, false); if (!clock_tree) return -1; -- 1.7.9.5

12 years, 9 months

big.LITTLE MP status Jan 25, 2013

by David Zinman

https://wiki.linaro.org/projects/big.LITTLE.MP The big.LITTLE MP Project Plan and project documents are currently being updated. Roadmap Cards ============== Improving HMP Linux scheduling - http://cards.linaro.org/browse/CARD-190 Version 14 of big.LITTLE MP has been pushed. Updates: ------- - Rebased over 3.8-rc2 - per-task-load-average-v3-merged dropped as its already present in 3.8-rc2 - sched-pack-small-tasks-v1-fixed and sched-pack-small-tasks-v1-arm dropped, as sched-pack-small-tasks-v2 contains all required patches. Even sched-pack-small-tasks-v2 is dropped at last moment due to some issues. - Few patches from arm-multi_pmu_v2 dropped as they are already mainlined. - Stats: Total distinct patches: 37 - New Patches: cpufreq-fixes-v2 (5) Details here: http://lists.linaro.org/pipermail/linaro-dev/2013-January/015037.html Blueprints =========== Big.LITTLE MP blueprints are in the process of being updated to reflect the upcoming development iteration. https://blueprints.launchpad.net/linaro-android/+spec/cpu-hot-plug-latency https://blueprints.launchpad.net/linaro-power-kernel/+spec/multi-cluster-cp… https://blueprints.launchpad.net/linaro-power-kernel/+spec/sched-cooperatio… https://blueprints.launchpad.net/linaro-power-kernel/+spec/timer-workqueue-… https://blueprints.launchpad.net/linaro-power-kernel/+spec/power-aware-sche… Other Efforts ============= The big.LITTLE MP team has just wrapped up a sprint in Cambridge. Items discussed were benchmarking, deliverables, quality assurance, configuration management and how Linaro will accomplish on going support for successive Android kernels. More on these items as they develop. QA === QA statistics will be available in future updates. -- David Zinman, Project Manager Linaro.org | Open source software for ARM SoCs

12 years, 9 months

[PATCH Resend 0/3] sched: fix nr_busy_cpus

by Vincent Guittot

The nr_busy_cpus field of the sched_group_power is sometime different from 0 whereas the platform is fully idle. This serie fixes 3 use cases: - when the SCHED softirq is raised on an idle core for idle load balance but the platform doesn't go out of the cpuidle state - when some CPUs enter idle state while booting all CPUs - when a CPU is unplug and/or replug Vincent Guittot (3): sched: fix nr_busy_cpus with coupled cpuidle sched: fix init NOHZ_IDLE flag sched: fix update NOHZ_IDLE flag kernel/sched/core.c | 1 + kernel/sched/fair.c | 2 +- kernel/time/tick-sched.c | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) -- 1.7.9.5

12 years, 9 months

Cheap Quad-A9 board

by Renato Golin Linaro

Anyone seen this? http://www.hardkernel.com/renewal_2011/products/prdt_info.php It's cheaper than a Pandaboard with a quad-core and 2GB or RAM and ridiculously small. That would probably get my LLVM builds under 1h... But it seems too good to be true, does any one have experience with it? cheers, --renato

12 years, 9 months

Re: [ RFC patch 0/4]: use runnable load avg in cfs balance instead of instant load

by Viresh Kumar

On 24 January 2013 09:00, Alex Shi <alex.shi(a)intel.com> wrote: > This patchset can be used, but causes burst waking benchmark aim9 drop 5~7% > on my 2 sockets machine. The reason is too light runnable load in early stage > of waked tasks cause imbalance in balancing. > > So, it is immature and just a reference for guys who want to go gurther. Pushed as runnable-load-avg-in-load-balance-v1-resent at: http://git.linaro.org/gitweb?p=arm/big.LITTLE/mp.git;a=summary

12 years, 9 months

Re: [patch v4 0/18] sched: simplified fork, release load avg and power awareness scheduling

by Viresh Kumar

On 24 January 2013 08:36, Alex Shi <alex.shi(a)intel.com> wrote: > Since the runnable info needs 345ms to accumulate, balancing > doesn't do well for many tasks burst waking. After talking with Mike > Galbraith, we are agree to just use runnable avg in power friendly > scheduling and keep current instant load in performance scheduling for > low latency. > > So the biggest change in this version is removing runnable load avg in > balance and just using runnable data in power balance. Pushed as power-aware-scheduling-v4 at: http://git.linaro.org/gitweb?p=arm/big.LITTLE/mp.git;a=summary

12 years, 9 months

Test Result Summary of Calendar Week 4, 2013 for Linux Linaro ubuntu Quantal.

by Botao Sun

Calendar Week 4, 2013: Here is test result summary for Linux Linaro ubuntu Quantal image on following boards: 1) ARM Versatile Express A9; 2) Samsung Origen; 3) TI Panda 4430; 4) TI Panda 4460; 5) ST Ericsson Snowball. Synopsis: Snowball now can boot into serial console successfully; Device Tree is unavailable in all images, and no Internet connection on Samsung Origen board. 1. ARM Versatile Express A9 + Linux Linaro Quantal (Column H): https://docs.google.com/a/linaro.org/spreadsheet/ccc?key=0AroPySpr4FnEdFNmV… It keeps exactly same status as last test result: only "Halt" & "Device Tree" test failed, all other features work well. 2. Samsung Origen + Linux Linaro Quantal (Column H): https://docs.google.com/a/linaro.org/spreadsheet/ccc?key=0AroPySpr4FnEdEowN… Device Tree is unavailable this week, also no Internet connection. "Halt" works well. 3. TI Panda 4430 + Linux Linaro Quantal (Column H): https://docs.google.com/a/linaro.org/spreadsheet/ccc?key=0AroPySpr4FnEdEwwZ… Only Device Tree is unavailable, all other features work well. 4. TI Panda 4460 + Linux Linaro Quantal (Column H): https://docs.google.com/a/linaro.org/spreadsheet/ccc?key=0AroPySpr4FnEdEwwZ… Same as TI Panda 4430, only Device Tree is unavailable, others work well. 5. ST Ericsson Snowball + Linux Linaro Quantal (Column H): https://docs.google.com/a/linaro.org/spreadsheet/ccc?key=0AroPySpr4FnEdFJ4X… Now board can boot into serial console successfully, but many features are unavailable, like HDMI, reboot, halt, Ethernet. For the previous week test summary (Calendar week 3), please refer to attachment. Thank you. Best Regards Botao Sun

12 years, 9 months

sched: Consequences of integrating the Per Entity Load Tracking Metric into the Load Balancer

by Preeti U Murthy

Hi everyone, I have been looking at how different workloads react when the per entity load tracking metric is integrated into the load balancer and what are the possible reasons for it. I had posted the integration patch earlier: https://lkml.org/lkml/2012/11/15/391 Essentially what I am doing is: 1.I have disabled CONFIG_FAIR_GROUP_SCHED to make the analysis simple 2.I have replaced cfs_rq->load.weight in weighted_cpuload() with cfs.runnable_load_avg,the active load tracking metric. 3.I have replaced se.load.weight in task_h_load() with se.load.avg.contrib,the per entity load tracking metric. 4.The load balancer will end up using these metrics. After conducting experiments on several workloads I found out that the performance of the workloads with the above integration would neither improve nor deteriorate.And this observation was consistent. Ideally the performance should have improved considering,that the metric does better tracking of load. Let me explain with a simple example as to why we should see a performance improvement ideally:Consider 2 80% tasks and 1 40% task. With integration: ---------------- 40% 80% 40% cpu1 cpu2 The above will be the scenario when the tasks fork initially.And this is a perfectly balanced system,hence no more load balancing.And proper distribution of loads on the cpu. Without integration ------------------- 40% 40% 80% 40% 80% 40% cpu1 cpu2 OR cpu1 cpu2 Because the view is that all the tasks as having the same load.The load balancer could ping pong tasks between these two situations. When I performed this experiment,I did not see an improvement in the performance though in the former case.On further observation I found that the following was actually happening. With integration ---------------- Initially 40% task sleeps 40% task wakes up and select_idle_sibling() decides to wake it up on cpu1 40% -> -> 40% 80% 40% 80% 40% 80% 40% cpu1 cpu2 cpu1 cpu2 cpu1 cpu2 This makes load balance trigger movement of 40% from cpu1 back to cpu2.Hence the stability that the load balancer was trying to achieve is gone.Hence the culprit boils down to select_idle_sibling.How is it the culprit and how is it hindering performance of the workloads? *What is the way ahead with the per entity load tracking metric in the load balancer then?* In replies to a post by Paul in https://lkml.org/lkml/2012/12/6/105, he mentions the following: "It is my intuition that the greatest carnage here is actually caused by wake-up load-balancing getting in the way of periodic in establishing a steady state. I suspect more mileage would result from reducing the interference wake-up load-balancing has with steady state." "The whole point of using blocked load is so that you can converge on a steady state where you don't NEED to move tasks. What disrupts this is we naturally prefer idle cpus on wake-up balance to reduce wake-up latency. I think the better answer is making these two processes load balancing() and select_idle_sibling() more co-operative." I had not realised how this would happen until I saw it happening in the above experiment. Based on what Paul explained above let us use the runnable load + the blocked load for calculating the load on a cfs runqueue rather than just the runnable load(which is what i am doing now) and see its consequence. Initially: 40% task sleeps 40% 80% 40% -> 80% 40% cpu1 cpu2 cpu1 cpu2 So initially the load on cpu1 is say 80 and on cpu2 also it is 80.Balanced.Now when 40% task sleeps,the total load on cpu2=runnable load+blocked load.which is still 80. As a consequence,firstly,during periodic load balancing the load is not moved from cpu1 to cpu2 when the 40% task sleeps.(It sees the load on cpu2 as 80 and not as 40). Hence the above scenario remains the same.On wake up,what happens? Here comes the point of making both load balancing and wake up balance(select_idle_sibling) co operative. How about we always schedule the woken up task on the prev_cpu? This seems more sensible considering load balancing considers blocked load as being a part of the load of cpu2. If we do that,we end up scheduling the 40% task back on cpu2.Back to the scenario which load balancing intended.Hence a steady state is maintained no matter what unless other tasks show up. Note that considering prev_cpu as the default cpu to run the woken up task on is possible only because we use blocked load for load balancing purposes. The above steps of using blocked load and selecting the prev_cpu as the target for the woken up task seems to me to be the next step.This could allow the load balance with the per entity load tracking metric to behave as it is supposed to without anything else disrupting it.And here i expect a performance improvement. Please do let me know your suggestions.This will greatly help take the right steps here on, in achieving the correct integration. Thank you Regards Preeti U Murthy

12 years, 9 months

OpenEmbedded runlevels empty?

by Tim Northover

Hi again, Sorry for spamming this list with what are essentially support questions, but now that bug #1099896 has been fixed, I'm trying to build an OpenEmbedded filesystem with it, following the instructions at https://wiki.linaro.org/HowTo/ARMv8/OpenEmbedded. What comes out seems amazingly close to a functional system, except that nothing gets put into runlevel 5 so none of the actual programs get started on boot (mainly sshd and a console login obviously, I could work around anything else). Has anyone seen this before or know where I should start looking? I've not really used OpenEmbedded before. Cheers. Tim. -- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

12 years, 9 months

[GIT PULL]: big LITTLE MP master v14

by Viresh Kumar

Hi Andrey, I know you have already pulled in V14, but it has got changed now. So, please repull it. (Branch: sched-pack-small-tasks-v2 is dropped now) Updates: ------- - Rebased over 3.8-rc2 - per-task-load-average-v3-merged dropped as its already present in 3.8-rc2 - sched-pack-small-tasks-v1-fixed and sched-pack-small-tasks-v1-arm dropped, as sched-pack-small-tasks-v2 contains all required patches. Even sched-pack-small-tasks-v2 is dropped at last moment due to some issues. - Few patches from arm-multi_pmu_v2 dropped as they are already mainlined. - Stats: Total distinct patches: 37 - New Patches: cpufreq-fixes-v2 (5) ------------x-----------------x-------------------- The following changes since commit d1c3ed669a2d452cacfb48c2d171a1f364dae2ed: Linux 3.8-rc2 (2013-01-02 18:13:21 -0800) are available in the git repository at: git://git.linaro.org/arm/big.LITTLE/mp.git big-LITTLE-MP-master-v14 for you to fetch changes up to 56a331912166f0f618f0c0cf633c87967fe487c0: Merge branches 'arm-multi_pmu_v2', 'cpufreq-fixes-v2', 'hw-bkp-v7.1-debug-v2', 'task-placement-v2-sysfs', 'misc-patches' and 'config-fragments' into big-LITTLE-MP-master-v14 (2013-01-23 17:24:03 +0530) ---------------------------------------------------------------- Chris Redpath (2): ARM: Experimental Frequency-Invariant Load Scaling Patch ARM: Fix build breakage when big.LITTLE.conf is not used. Dietmar Eggemann (2): ARM: hw_breakpoint: Check function for OS Save and Restore mechanism ARM: hw_breakpoint: Debug powerdown support for self-hosted debug Jon Medhurst (1): ARM: sched: Avoid empty 'slow' HMP domain Liviu Dudau (1): linaro/configs: big-LITTLE-MP: Enable the new tunable sysfs interface by default. Lorenzo Pieralisi (1): ARM: kernel: provide cluster to logical cpu mask mapping API Morten Rasmussen (14): sched: entity load-tracking load_avg_ratio sched: Task placement for heterogeneous systems based on task load-tracking sched: Forced task migration on heterogeneous systems sched: Introduce priority-based task migration filter ARM: Add HMP scheduling support for ARM architecture ARM: sched: Use device-tree to provide fast/slow CPU list for HMP ARM: sched: Setup SCHED_HMP domains sched: Add ftrace events for entity load-tracking sched: Add HMP task migration ftrace event sched: SCHED_HMP multi-domain task migration control sched: Enable HMP priority filter by default sched: Only down migrate low priority tasks if allowed by affinity mask linaro/configs: Enable HMP priority filter by default sched: Basic global balancing support for HMP Olivier Cozette (1): ARM: Change load tracking scale using sysfs Paul Turner (1): sched: implement usage tracking Sudeep KarkadaNagesha (6): ARM: perf: replace global CPU PMU pointer with per-cpu pointers ARM: perf: register CPU PMUs with idr types ARM: perf: set cpu affinity to support multiple PMUs ARM: perf: set cpu affinity for the irqs correctly ARM: perf: remove spaces in CPU PMU names ARM: perf: save/restore pmu registers in pm notifier Thomas Gleixner (1): genirq: Add default affinity mask command line option Viresh Kumar (9): configs: Add config fragments for big LITTLE MP linaro/configs: Update big LITTLE MP fragment for task placement work config-frag/big-LITTLE: Use device-tree to provide fast/slow CPU list for HMP cpufreq: Manage only online cpus cpufreq: Notify governors when cpus are hot-[un]plugged cpufreq: Don't use cpu removed during cpufreq_driver_unregister cpufreq: Simplify __cpufreq_remove_dev() cpufreq: Simplify cpufreq_add_dev() Merge branches 'arm-multi_pmu_v2', 'cpufreq-fixes-v2', 'hw-bkp-v7.1-debug-v2', 'task-placement-v2-sysfs', 'misc-patches' and 'config-fragments' into big-LITTLE-MP-master-v14 Documentation/devicetree/bindings/arm/pmu.txt | 3 + Documentation/kernel-parameters.txt | 9 + arch/arm/Kconfig | 85 ++ arch/arm/include/asm/hw_breakpoint.h | 3 + arch/arm/include/asm/pmu.h | 12 + arch/arm/include/asm/topology.h | 34 + arch/arm/kernel/hw_breakpoint.c | 56 +- arch/arm/kernel/perf_event.c | 19 + arch/arm/kernel/perf_event_cpu.c | 117 ++- arch/arm/kernel/perf_event_v7.c | 57 +- arch/arm/kernel/topology.c | 120 +++ drivers/cpufreq/cpufreq.c | 321 ++++---- drivers/cpufreq/cpufreq_stats.c | 27 +- drivers/cpufreq/freq_table.c | 9 + include/linux/cpufreq.h | 14 +- include/linux/sched.h | 12 + include/trace/events/sched.h | 153 ++++ kernel/irq/irqdesc.c | 21 +- kernel/sched/core.c | 4 + kernel/sched/debug.c | 3 + kernel/sched/fair.c | 1021 ++++++++++++++++++++++++- kernel/sched/sched.h | 13 +- linaro/configs/big-LITTLE-MP.conf | 13 + 23 files changed, 1886 insertions(+), 240 deletions(-) create mode 100644 linaro/configs/big-LITTLE-MP.conf

12 years, 9 months

← Newer
1
2
3
4
5
6
7
8
9
Older →

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

linaro-dev January 2013