In order to allow better integration between the cpuidle framework and the
scheduler, reducing the distance between these two sub-components will
facilitate this integration by moving part of the cpuidle code in the idle
task file and, because idle.c is in the sched directory, we have access to
the scheduler's private structures.
This patch splits the cpuidle_idle_call main entry function into 3 calls
to a newly added API:
1. select the idle state
2. enter the idle state
3. reflect the idle state
The cpuidle_idle_call calls these three functions to implement the main
idle entry function.
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
Acked-by: Nicolas Pitre <nico(a)linaro.org>
---
ChangeLog:
V4:
* rebased on top of tip/master
V3:
* moved broadcast timer outside of cpuidle_enter() as suggested by
Preeti U Murthy : https://lkml.org/lkml/2014/2/24/601
V2:
* splitted cpuidle_select error check into 'cpuidle_enabled' function
---
drivers/cpuidle/cpuidle.c | 96 +++++++++++++++++++++++++++++++++++-----------
include/linux/cpuidle.h | 19 +++++++++
2 files changed, 94 insertions(+), 21 deletions(-)
Index: cpuidle-next/drivers/cpuidle/cpuidle.c
===================================================================
--- cpuidle-next.orig/drivers/cpuidle/cpuidle.c
+++ cpuidle-next/drivers/cpuidle/cpuidle.c
@@ -65,6 +65,26 @@ int cpuidle_play_dead(void)
}
/**
+ * cpuidle_enabled - check if the cpuidle framework is ready
+ * @dev: cpuidle device for this cpu
+ * @drv: cpuidle driver for this cpu
+ *
+ * Return 0 on success, otherwise:
+ * -NODEV : the cpuidle framework is not available
+ * -EBUSY : the cpuidle framework is not initialized
+ */
+int cpuidle_enabled(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
+ if (off || !initialized)
+ return -ENODEV;
+
+ if (!drv || !dev || !dev->enabled)
+ return -EBUSY;
+
+ return 0;
+}
+
+/**
* cpuidle_enter_state - enter the state and update stats
* @dev: cpuidle device for this cpu
* @drv: cpuidle driver for this cpu
@@ -108,6 +128,51 @@ int cpuidle_enter_state(struct cpuidle_d
}
/**
+ * cpuidle_select - ask the cpuidle framework to choose an idle state
+ *
+ * @drv: the cpuidle driver
+ * @dev: the cpuidle device
+ *
+ * Returns the index of the idle state.
+ */
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
+ return cpuidle_curr_governor->select(drv, dev);
+}
+
+/**
+ * cpuidle_enter - enter into the specified idle state
+ *
+ * @drv: the cpuidle driver tied with the cpu
+ * @dev: the cpuidle device
+ * @index: the index in the idle state table
+ *
+ * Returns the index in the idle state, < 0 in case of error.
+ * The error code depends on the backend driver
+ */
+int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ int index)
+{
+ if (cpuidle_state_is_coupled(dev, drv, index))
+ return cpuidle_enter_state_coupled(dev, drv, index);
+ return cpuidle_enter_state(dev, drv, index);
+}
+
+/**
+ * cpuidle_reflect - tell the underlying governor what was the state
+ * we were in
+ *
+ * @dev : the cpuidle device
+ * @index: the index in the idle state table
+ *
+ */
+void cpuidle_reflect(struct cpuidle_device *dev, int index)
+{
+ if (cpuidle_curr_governor->reflect)
+ cpuidle_curr_governor->reflect(dev, index);
+}
+
+/**
* cpuidle_idle_call - the main idle loop
*
* NOTE: no locks or semaphores should be used here
@@ -116,26 +181,21 @@ int cpuidle_enter_state(struct cpuidle_d
int cpuidle_idle_call(void)
{
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
- struct cpuidle_driver *drv;
- int next_state, entered_state;
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ int next_state, entered_state, ret;
bool broadcast;
- if (off || !initialized)
- return -ENODEV;
-
- /* check if the device is ready */
- if (!dev || !dev->enabled)
- return -EBUSY;
-
- drv = cpuidle_get_cpu_driver(dev);
+ ret = cpuidle_enabled(drv, dev);
+ if (ret < 0)
+ return ret;
/* ask the governor for the next state */
- next_state = cpuidle_curr_governor->select(drv, dev);
+ next_state = cpuidle_select(drv, dev);
+
if (need_resched()) {
dev->last_residency = 0;
/* give the governor an opportunity to reflect on the outcome */
- if (cpuidle_curr_governor->reflect)
- cpuidle_curr_governor->reflect(dev, next_state);
+ cpuidle_reflect(dev, next_state);
local_irq_enable();
return 0;
}
@@ -146,14 +206,9 @@ int cpuidle_idle_call(void)
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
return -EBUSY;
-
trace_cpu_idle_rcuidle(next_state, dev->cpu);
- if (cpuidle_state_is_coupled(dev, drv, next_state))
- entered_state = cpuidle_enter_state_coupled(dev, drv,
- next_state);
- else
- entered_state = cpuidle_enter_state(dev, drv, next_state);
+ entered_state = cpuidle_enter(drv, dev, next_state);
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
@@ -161,8 +216,7 @@ int cpuidle_idle_call(void)
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
/* give the governor an opportunity to reflect on the outcome */
- if (cpuidle_curr_governor->reflect)
- cpuidle_curr_governor->reflect(dev, entered_state);
+ cpuidle_reflect(dev, entered_state);
return 0;
}
Index: cpuidle-next/include/linux/cpuidle.h
===================================================================
--- cpuidle-next.orig/include/linux/cpuidle.h
+++ cpuidle-next/include/linux/cpuidle.h
@@ -119,6 +119,15 @@ struct cpuidle_driver {
#ifdef CONFIG_CPU_IDLE
extern void disable_cpuidle(void);
+
+extern int cpuidle_enabled(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev);
+extern int cpuidle_select(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev);
+extern int cpuidle_enter(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index);
+extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
+
extern int cpuidle_idle_call(void);
extern int cpuidle_register_driver(struct cpuidle_driver *drv);
extern struct cpuidle_driver *cpuidle_get_driver(void);
@@ -141,6 +150,16 @@ extern int cpuidle_play_dead(void);
extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
#else
static inline void disable_cpuidle(void) { }
+static inline int cpuidle_enabled(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{return -ENODEV; }
+static inline int cpuidle_select(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{return -ENODEV; }
+static inline int cpuidle_enter(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index)
+{return -ENODEV; }
+static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { }
static inline int cpuidle_idle_call(void) { return -ENODEV; }
static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
{return -ENODEV; }
This patchset creates/calls cpufreq suspend/resume callbacks from dpm_{suspend|resume}()
for handling suspend/resume of cpufreq governors and core.
There are multiple problems that are fixed by this patch:
- Nishanth Menon (TI) found an interesting problem on his platform, OMAP. His board
wasn't working well with suspend/resume as calls for removing non-boot CPUs
was turning out into a call to drivers ->target() which then tries to play
with regulators. But regulators and their I2C bus were already suspended and
this resulted in a failure. Many platforms have such problems, samsung, tegra,
etc.. They solved it with driver specific PM notifiers where they used to
disable their driver's ->target() routine.
- Lan Tianyu (Intel) & Jinhyuk Choi (Broadcom) found an issue where tunables
configuration for clusters/sockets with non-boot CPUs was getting lost after
suspend/resume, as we were notifying governors with CPUFREQ_GOV_POLICY_EXIT on
removal of the last cpu for that policy and so deallocating memory for
tunables. This is fixed by this patch as we don't allow any operation on
governors after device suspend and before device resume now.
This is already tested by few people and so incorporating their Tested-by as
well.
I have tested this again on latest stuff on my thinkpad for several
suspend/resume cycles.
V6->V7:
- Fixed crash reported by S.Warren on systems without a cpufreq driver.
- Moved some function doc-comments to patch 1 from a later patch.
For-v3.15
Viresh Kumar (7):
cpufreq: suspend governors on system suspend/hibernate
cpufreq: suspend governors from dpm_{suspend|resume}()
cpufreq: call driver's suspend/resume for each policy
cpufreq: Implement cpufreq_generic_suspend()
cpufreq: exynos: Use cpufreq_generic_suspend()
cpufreq: s5pv210: Use cpufreq_generic_suspend()
cpufreq: Tegra: Use cpufreq_generic_suspend()
drivers/base/power/main.c | 5 ++
drivers/cpufreq/cpufreq.c | 137 +++++++++++++++++++++++---------------
drivers/cpufreq/exynos-cpufreq.c | 96 ++------------------------
drivers/cpufreq/s5pv210-cpufreq.c | 49 +-------------
drivers/cpufreq/tegra-cpufreq.c | 46 ++-----------
include/linux/cpufreq.h | 11 +++
6 files changed, 113 insertions(+), 231 deletions(-)
--
1.7.12.rc2.18.g61b472e
This is v3 of my attempt to add support for a generic pci_host_bridge controller created
from a description passed in the device tree.
Changes from v2:
- Use range->cpu_addr when calling pci_address_to_pio()
- Introduce pci_register_io_range() helper function in order to register
io ranges ahead of their conversion to PIO values. This is needed as no
information is being stored yet regarding the range mapping, making
pci_address_to_pio() fail. Default weak implementation does nothing,
to cover the default weak implementation of pci_address_to_pio() that
expects direct mapping of physical addresses into PIO values (x86 view).
Changes from v1:
- Add patch to fix conversion of IO ranges into IO resources.
- Added a domain_nr member to pci_host_bridge structure, and a new function
to create a root bus in a given domain number. In order to facilitate that
I propose changing the order of initialisation between pci_host_bridge and
it's related bus in pci_create_root_bus() as sort of a rever of 7b5436635800.
This is done in patch 1/4 and 2/4.
- Added a simple allocator of domain numbers in drivers/pci/host-bridge.c. The
code will first try to get a domain id from of_alias_get_id(..., "pci-domain")
and if that fails assign the next unallocated domain id.
- Changed the name of the function that creates the generic host bridge from
pci_host_bridge_of_init to of_create_pci_host_bridge and exported as GPL symbol.
v1 thread here: https://lkml.org/lkml/2014/2/3/380
The following is an edit of the original blurb:
Following the discussion started here [1], I now have a proposal for tackling
generic support for host bridges described via device tree. It is an initial
stab at it, to try to get feedback and suggestions, but it is functional enough
that I have PCI Express for arm64 working on an FPGA using the patch that I am
also publishing that adds support for PCI for that platform.
Looking at the existing architectures that fit the requirements (use of device
tree and PCI) yields the powerpc and microblaze as generic enough to make them
candidates for conversion. I have a tentative patch for microblaze that I can
only compile test it, unfortunately using qemu-microblaze leads to an early
crash in the kernel.
As Bjorn has mentioned in the previous discussion, the idea is to add to
struct pci_host_bridge enough data to be able to reduce the size or remove the
architecture specific pci_controller structure. arm64 support actually manages
to get rid of all the architecture static data and has no pci_controller structure
defined. For host bridge drivers that means a change of API unless architectures
decide to provide a compatibility layer (comments here please).
In order to initialise a host bridge with the new API, the following example
code is sufficient for a _probe() function:
static int myhostbridge_probe(struct platform_device *pdev)
{
int err;
struct device_node *dev;
struct pci_host_bridge *bridge;
struct myhostbridge_port *pp;
resource_size_t lastbus;
dev = pdev->dev.of_node;
if (!of_device_is_available(dev)) {
pr_warn("%s: disabled\n", dev->full_name);
return -ENODEV;
}
pp = kzalloc(sizeof(struct myhostbridge_port), GFP_KERNEL);
if (!pp)
return -ENOMEM;
bridge = of_create_pci_host_bridge(&pdev->dev, &myhostbridge_ops, pp);
if (!bridge) {
err = -EINVAL;
goto bridge_init_fail;
}
err = myhostbridge_setup(bridge->bus);
if (err)
goto bridge_init_fail;
/* We always enable PCI domains and we keep domain 0 backward
* compatible in /proc for video cards
*/
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC);
lastbus = pci_scan_child_bus(bridge->bus);
pci_bus_update_busn_res_end(bridge->bus, lastbus);
pci_assign_unassigned_bus_resources(bridge->bus);
pci_bus_add_devices(bridge->bus);
return 0;
bridge_init_fail:
kfree(pp);
return err;
}
[1] http://thread.gmane.org/gmane.linux.kernel.pci/25946
Best regards,
Liviu
Liviu Dudau (5):
pci: Introduce pci_register_io_range() helper function.
pci: OF: Fix the conversion of IO ranges into IO resources.
pci: Create pci_host_bridge before its associated bus in pci_create_root_bus.
pci: Introduce a domain number for pci_host_bridge.
pci: Add support for creating a generic host_bridge from device tree
drivers/of/address.c | 39 +++++++++++++
drivers/pci/host-bridge.c | 134 +++++++++++++++++++++++++++++++++++++++++++++
drivers/pci/probe.c | 66 ++++++++++++++--------
include/linux/of_address.h | 14 +----
include/linux/pci.h | 17 ++++++
5 files changed, 236 insertions(+), 34 deletions(-)
--
1.9.0