This patch adds cpufreq callbacks to dpm_{suspend|resume}_noirq() for handling
suspend/resume of cpufreq governors.
There are multiple problems that are fixed by this patch:
- Nishanth Menon (TI) found an interesting problem on his platform, OMAP. His board
wasn't working well with suspend/resume as calls for removing non-boot CPUs
was turning out into a call to drivers ->target() which then tries to play
with regulators. But regulators and their I2C bus were already suspended and
this resulted in a failure. Many platforms have such problems, samsung, tegra,
etc.. They solved it with driver specific PM notifiers where they used to
disable their driver's ->target() routine.
- Lan Tianyu (Intel) & Jinhyuk Choi (Broadcom) found another issue where
tunables configuration for clusters/sockets with non-boot CPUs was getting
lost after suspend/resume, as we were notifying governors with
CPUFREQ_GOV_POLICY_EXIT on removal of the last cpu for that policy and so
deallocating memory for tunables. This is also fixed with this patch as we
don't allow any operation on Governors during suspend/resume now.
Reported-and-tested-by: Lan Tianyu <tianyu.lan(a)intel.com>
Reported-and-tested-by: Nishanth Menon <nm(a)ti.com>
Reported-by: Jinhyuk Choi <jinchoi(a)broadcom.com>
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
---
This is almost same as 1/6 of V3 version of this patchset:
https://lkml.org/lkml/2013/11/25/838
This is done to get some initial fixes for 3.13. These are already tested by
both the reporters of initial problems. Tegra/exynos/s5p will keep running their
PM notifiers until v3.14, as they are currently able to work with them..
drivers/base/power/main.c | 3 +++
drivers/cpufreq/cpufreq.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++
include/linux/cpufreq.h | 8 ++++++++
3 files changed, 61 insertions(+)
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 1b41fca..e3219df 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -29,6 +29,7 @@
#include <linux/async.h>
#include <linux/suspend.h>
#include <trace/events/power.h>
+#include <linux/cpufreq.h>
#include <linux/cpuidle.h>
#include <linux/timer.h>
@@ -540,6 +541,7 @@ static void dpm_resume_noirq(pm_message_t state)
dpm_show_time(starttime, state, "noirq");
resume_device_irqs();
cpuidle_resume();
+ cpufreq_resume();
}
/**
@@ -955,6 +957,7 @@ static int dpm_suspend_noirq(pm_message_t state)
ktime_t starttime = ktime_get();
int error = 0;
+ cpufreq_suspend();
cpuidle_pause();
suspend_device_irqs();
mutex_lock(&dpm_list_mtx);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 02d534d..b6c7821 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -26,6 +26,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/tick.h>
#include <trace/events/power.h>
@@ -47,6 +48,9 @@ static LIST_HEAD(cpufreq_policy_list);
static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
#endif
+/* Flag to suspend/resume CPUFreq governors */
+static bool cpufreq_suspended;
+
static inline bool has_target(void)
{
return cpufreq_driver->target_index || cpufreq_driver->target;
@@ -1462,6 +1466,48 @@ static struct subsys_interface cpufreq_interface = {
.remove_dev = cpufreq_remove_dev,
};
+/*
+ * Callbacks for suspending/resuming governors as some platforms can't change
+ * frequency after this point in suspend cycle. Because some of the devices
+ * (like: i2c, regulators, etc) they use for changing frequency are suspended
+ * quickly after this point.
+ */
+void cpufreq_suspend(void)
+{
+ struct cpufreq_policy *policy;
+
+ if (!has_target())
+ return;
+
+ pr_debug("%s: Suspending Governors\n", __func__);
+
+ list_for_each_entry(policy, &cpufreq_policy_list, policy_list)
+ if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP))
+ pr_err("%s: Failed to stop governor for policy: %p\n",
+ __func__, policy);
+
+ cpufreq_suspended = true;
+}
+
+void cpufreq_resume(void)
+{
+ struct cpufreq_policy *policy;
+
+ if (!has_target())
+ return;
+
+ pr_debug("%s: Resuming Governors\n", __func__);
+
+ cpufreq_suspended = false;
+
+ list_for_each_entry(policy, &cpufreq_policy_list, policy_list)
+ if (__cpufreq_governor(policy, CPUFREQ_GOV_START) ||
+ __cpufreq_governor(policy,
+ CPUFREQ_GOV_LIMITS))
+ pr_err("%s: Failed to start governor for policy: %p\n",
+ __func__, policy);
+}
+
/**
* cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
*
@@ -1764,6 +1810,10 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
struct cpufreq_governor *gov = NULL;
#endif
+ /* Don't start any governor operations if we are entering suspend */
+ if (cpufreq_suspended)
+ return 0;
+
if (policy->governor->max_transition_latency &&
policy->cpuinfo.transition_latency >
policy->governor->max_transition_latency) {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index dc196bb..ee5fe9d 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -280,6 +280,14 @@ cpufreq_verify_within_cpu_limits(struct cpufreq_policy *policy)
policy->cpuinfo.max_freq);
}
+#ifdef CONFIG_CPU_FREQ
+void cpufreq_suspend(void);
+void cpufreq_resume(void);
+#else
+static inline void cpufreq_suspend(void) {}
+static inline void cpufreq_resume(void) {}
+#endif
+
/*********************************************************************
* CPUFREQ NOTIFIER INTERFACE *
*********************************************************************/
--
1.7.12.rc2.18.g61b472e
Hi Frederic,
Sorry for idiot of nohz_full. When we using this feature on my mobile
devices, we found this feature keep cpu0 in periodic tick mode. then the
timer interrupt on cpu0 is very higher than normal nohz mode.
that cause high power consuming cost.
I found you have mention this on commit: a382bf934449
nohz: Assign timekeeping duty to a CPU outside the full dynticks range
In fact, if all full dynticks cpu are in idle, cpu0 should be safe to
get into idle too. Do you have some plan or idea to implement this?
otherwise, power cost is too high to enable nohz_full in mobile platform.
--
Thanks
Alex
This reverts commit 4725d41daea7e0cc79b3fb92af012b8cb18fccff.
This patch was dropped when the big.LITTLE switcher was submitted to the
mainline kernel because it wasn't then being used and contained a
logical flaw which meant it wouldn't have achieved what it was
attempting to do anyway. It can also produce compilation warnings in
certain configurations.
---
arch/arm/common/bL_switcher.c | 32 --------------------------------
arch/arm/include/asm/bL_switcher.h | 6 ------
2 files changed, 38 deletions(-)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 1883c5b..2193ae7 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -404,38 +404,6 @@ int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
EXPORT_SYMBOL_GPL(bL_switch_request_cb);
/*
- * Detach an outstanding switch request.
- *
- * The switcher will continue with the switch request in the background,
- * but the completer function will not be called.
- *
- * This may be necessary if the completer is in a kernel module which is
- * about to be unloaded.
- */
-void bL_switch_request_detach(unsigned int cpu,
- bL_switch_completion_handler completer)
-{
- struct bL_thread *t;
-
- if (cpu >= ARRAY_SIZE(bL_threads)) {
- pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
- return;
- }
-
- t = &bL_threads[cpu];
-
- if (IS_ERR(t->task) || !t->task)
- return;
-
- spin_lock(&t->lock);
- if (t->completer == completer)
- t->completer = NULL;
- spin_unlock(&t->lock);
-}
-
-EXPORT_SYMBOL_GPL(bL_switch_request_detach);
-
-/*
* Activation and configuration code.
*/
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
index 482383b..87ebcbc 100644
--- a/arch/arm/include/asm/bL_switcher.h
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -40,9 +40,6 @@ static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_i
#ifdef CONFIG_BL_SWITCHER
-void bL_switch_request_detach(unsigned int cpu,
- bL_switch_completion_handler completer);
-
int bL_switcher_register_notifier(struct notifier_block *nb);
int bL_switcher_unregister_notifier(struct notifier_block *nb);
@@ -61,9 +58,6 @@ int bL_switcher_trace_trigger(void);
int bL_switcher_get_logical_index(u32 mpidr);
#else
-static void bL_switch_request_detach(unsigned int cpu,
- bL_switch_completion_handler completer) { }
-
static inline int bL_switcher_register_notifier(struct notifier_block *nb)
{
return 0;
--
1.7.10.4
From: Arnaldo Carvalho de Melo <acme(a)ghostprotocols.net>
Hi Ingo,
The first 20 patches in this series are the same as in the previous
one, so I'm not reposting them now.
This series has the code style/constification changes you suggested in
the symbols code and then a reworked fix to the basename problem plus some
more patches not present in the previous series.
Please let me know if you find any further problems,
Best Regards,
- Arnaldo
The following changes since commit 6d65894bc028d0342829ea1e64c9e9efad571124:
tools lib traceevent: Update kvm plugin with is_writable_pte helper (2013-12-04 15:38:14 -0300)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux tags/perf-core-for-mingo
for you to fetch changes up to 1448fef40af6079de38380c3a81bcf9994a1037d:
perf unwinding: Use the per-feature check flags (2013-12-10 16:51:12 -0300)
----------------------------------------------------------------
perf/core improvements and fixes:
. Add an option in 'perf script' to print the source line number, from Adrian Hunter
. Add --header/--header-only options to 'script' and 'report', the default is not
tho show the header info, but as this has been the default for some time,
leave a single line explaining how to obtain that information, from Jiri Olsa.
. Fix symoff printing in callchains in 'perf script', from Adrian Hunter.
. Assorted mmap_pages handling fixes, from Adrian Hunter.
. Fix summary percentage when processing files in 'perf trace', fom David Ahern.
. Handle old kernels where the "raw_syscalls" tracepoints were called plan "syscalls",
in 'perf trace', from David Ahern.
. Several man pages typo fixes from Dongsheng Yang.
. Add '-v' option to 'perf kvm', from Dongsheng Yang.
. Make perf kvm diff support --guestmount, from Dongsheng Yang.
. Get rid of several die() calls in libtraceevent, from Namhyung Kim.
. Use basename() in a more robust way, to avoid problems related to different
system library implementations for that function, from Stephane Eranian.
. Remove open coded management of short_name_allocated member, from Adrian Hunter
. Several cleanups in the "dso" methods, constifying some parameters and
renaming some fields to clarify its purpose.
. Add per-feature check flags, fixing libunwind related build problems on some
architectures, from Jean Pihet.
Signed-off-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
----------------------------------------------------------------
Adrian Hunter (7):
perf script: Fix symoff printing in callchains
perf script: Add an option to print the source line number
perf record: Fix display of incorrect mmap pages
perf evlist: Remove unnecessary parentheses
perf evlist: Fix max mmap_pages
perf evlist: Fix mmap pages rounding to power of 2
perf symbols: Remove open coded management of short_name_allocated member
Arnaldo Carvalho de Melo (8):
perf symbols: Rename [sl]name_alloc to match the members they refer to
perf machine: Don't open code assign dso->short_name
perf symbols: Set alloc flag close to setting the long_name
perf symbols: Remove open coded management of long_name_allocated member
perf symbols: Constify dso->long_name
perf symbols: Set freed members to NULL in dso destructor
perf symbols: Constify some DSO methods parameters
perf symbols: Rename filename argument
David Ahern (2):
perf trace: Add support for syscalls vs raw_syscalls
perf trace: Fix summary percentage when processing files
Dongsheng Yang (6):
perf kvm: Introduce option -v for perf kvm command.
perf kvm: Fix bug in 'stat report'
perf archive: Remove duplicated 'runs' in man page
perf annotate: Fix typo
perf kvm: Move code to generate filename for perf-kvm to function.
perf kvm: Make perf kvm diff support --guestmount.
Jean Pihet (2):
perf tools: Add per-feature check flags
perf unwinding: Use the per-feature check flags
Jiri Olsa (2):
perf report: Add --header/--header-only options
perf script: Add --header/--header-only options
Namhyung Kim (5):
tools lib traceevent: Get rid of malloc_or_die() in pevent_filter_alloc()
tools lib traceevent: Get rid of malloc_or_die() in add_event()
tools lib traceevent: Get rid of die() in create_arg_item()
tools lib traceevent: Get rid of malloc_or_die() in pevent_filter_add_filter_str()
tools lib traceevent: Get rid of die() in pevent_filter_clear_trivial()
Stephane Eranian (1):
perf symbols: Fix bug in usage of the basename() function
Steven Rostedt (1):
tools lib traceevent: Report better error message on bad function args
tools/lib/traceevent/event-parse.c | 28 +++++---
tools/lib/traceevent/event-parse.h | 2 +-
tools/lib/traceevent/parse-filter.c | 57 +++++++++++----
tools/perf/Documentation/perf-archive.txt | 6 +-
tools/perf/Documentation/perf-kvm.txt | 7 +-
tools/perf/Documentation/perf-report.txt | 9 +++
tools/perf/Documentation/perf-script.txt | 8 ++-
tools/perf/builtin-annotate.c | 2 +-
tools/perf/builtin-diff.c | 3 +-
tools/perf/builtin-kvm.c | 11 ++-
tools/perf/builtin-record.c | 2 +-
tools/perf/builtin-report.c | 22 +++++-
tools/perf/builtin-script.c | 23 +++++-
tools/perf/builtin-trace.c | 32 ++++++++-
tools/perf/config/Makefile | 52 ++++++++------
tools/perf/config/feature-checks/Makefile | 8 +--
tools/perf/util/annotate.c | 2 +-
tools/perf/util/build-id.c | 2 +-
tools/perf/util/build-id.h | 2 +-
tools/perf/util/dso.c | 112 ++++++++++++++++++++----------
tools/perf/util/dso.h | 16 ++---
tools/perf/util/evlist.c | 10 +--
tools/perf/util/header.c | 6 +-
tools/perf/util/machine.c | 6 +-
tools/perf/util/map.c | 17 +++++
tools/perf/util/map.h | 2 +
tools/perf/util/probe-event.c | 2 +-
tools/perf/util/session.c | 15 +++-
tools/perf/util/session.h | 1 +
tools/perf/util/srcline.c | 2 +-
tools/perf/util/symbol.c | 38 ++++------
tools/perf/util/symbol.h | 3 +-
tools/perf/util/util.c | 14 ++++
tools/perf/util/util.h | 14 ++++
tools/perf/util/vdso.c | 2 +-
35 files changed, 375 insertions(+), 163 deletions(-)
From: Radha Mohan Chintakuntla <rchintakuntla(a)cavium.com>
This patch series provides an implementation of supporting 48-bit
Physical Addresses for ARMv8 platforms. It is the maximum width that
any ARMv8 based processor can support.
The implementation extends the existing support of 40-bit PA.The kernel
and user space will now be able to access 128TB each. With 4KB page size
the Linux now will be using 4 levels of page tables by making use of
'pud'. And with 64KB page size the Linux will be using 3 levels of page
tables.
The code has been tested with LTP.
Radha Mohan Chintakuntla (2):
arm64: Add support for 48-bit Physical Addresses
arm64: Add 48-bit PA support for 64KB page size
arch/arm64/include/asm/memory.h | 6 +--
arch/arm64/include/asm/page.h | 4 +-
arch/arm64/include/asm/pgalloc.h | 20 ++++++-
arch/arm64/include/asm/pgtable-3level-hwdef.h | 34 ++++++++++++
arch/arm64/include/asm/pgtable-4level-hwdef.h | 57 ++++++++++++++++++++
arch/arm64/include/asm/pgtable-4level-types.h | 71 +++++++++++++++++++++++++
arch/arm64/include/asm/pgtable-hwdef.h | 9 ++--
arch/arm64/include/asm/pgtable.h | 50 +++++++++++++++---
arch/arm64/include/asm/tlb.h | 2 -
arch/arm64/kernel/head.S | 55 +++++++++++++++++--
arch/arm64/kernel/traps.c | 7 +++
arch/arm64/mm/proc.S | 2 +-
12 files changed, 289 insertions(+), 28 deletions(-)
create mode 100644 arch/arm64/include/asm/pgtable-4level-hwdef.h
create mode 100644 arch/arm64/include/asm/pgtable-4level-types.h
The Power State and Coordination Interface (PSCI) specification defines
SYSTEM_OFF and SYSTEM_RESET functions for system poweroff and reboot.
This patchset adds emulation of PSCI SYSTEM_OFF and SYSTEM_RESET functions
in KVM ARM/ARM64 by forwarding them to user space (QEMU or KVMTOOL) using
KVM_EXIT_SHUTDOWN and KVM_EXIT_RESET exit reasons.
To try this patch from guest kernel, we will need PSCI-based restart and
poweroff support in the guest kenel for both ARM and ARM64.
Rob Herring has already submitted patches for PSCI-based restart and
poweroff in ARM kernel but these are not merged yet due unstable device
tree bindings of kernel PSCI support. We will be having similar patches
for PSCI-based restart and poweroff in ARM64 kernel.
(Refer http://www.spinics.net/lists/arm-kernel/msg262217.html)
(Refer http://www.spinics.net/lists/devicetree/msg05348.html)
Anup Patel (2):
KVM: Add KVM_EXIT_RESET to user space API header
ARM/ARM64: KVM: Forward PSCI SYSTEM_OFF and SYSTEM_RESET to user
space
arch/arm/include/asm/kvm_psci.h | 2 +-
arch/arm/include/uapi/asm/kvm.h | 2 ++
arch/arm/kvm/handle_exit.c | 7 ++++++-
arch/arm/kvm/psci.c | 38 +++++++++++++++++++++++++++++--------
arch/arm64/include/asm/kvm_psci.h | 2 +-
arch/arm64/include/uapi/asm/kvm.h | 2 ++
arch/arm64/kvm/handle_exit.c | 10 ++++++----
include/uapi/linux/kvm.h | 1 +
8 files changed, 49 insertions(+), 15 deletions(-)
--
1.7.9.5