In order to allow better integration between the cpuidle framework and the scheduler, reducing the distance between these two sub-components will facilitate this integration by moving part of the cpuidle code in the idle task file and, because idle.c is in the sched directory, we have access to the scheduler's private structures.
This patch splits the cpuidle_idle_call main entry function into 3 calls to a newly added API: 1. select the idle state 2. enter the idle state 3. reflect the idle state
The cpuidle_idle_call calls these three functions to implement the main idle entry function.
Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Acked-by: Nicolas Pitre nico@linaro.org ---
ChangeLog:
V4: * rebased on top of tip/master V3: * moved broadcast timer outside of cpuidle_enter() as suggested by Preeti U Murthy : https://lkml.org/lkml/2014/2/24/601 V2: * splitted cpuidle_select error check into 'cpuidle_enabled' function
--- drivers/cpuidle/cpuidle.c | 96 +++++++++++++++++++++++++++++++++++----------- include/linux/cpuidle.h | 19 +++++++++ 2 files changed, 94 insertions(+), 21 deletions(-)
Index: cpuidle-next/drivers/cpuidle/cpuidle.c =================================================================== --- cpuidle-next.orig/drivers/cpuidle/cpuidle.c +++ cpuidle-next/drivers/cpuidle/cpuidle.c @@ -65,6 +65,26 @@ int cpuidle_play_dead(void) }
/** + * cpuidle_enabled - check if the cpuidle framework is ready + * @dev: cpuidle device for this cpu + * @drv: cpuidle driver for this cpu + * + * Return 0 on success, otherwise: + * -NODEV : the cpuidle framework is not available + * -EBUSY : the cpuidle framework is not initialized + */ +int cpuidle_enabled(struct cpuidle_driver *drv, struct cpuidle_device *dev) +{ + if (off || !initialized) + return -ENODEV; + + if (!drv || !dev || !dev->enabled) + return -EBUSY; + + return 0; +} + +/** * cpuidle_enter_state - enter the state and update stats * @dev: cpuidle device for this cpu * @drv: cpuidle driver for this cpu @@ -108,6 +128,51 @@ int cpuidle_enter_state(struct cpuidle_d }
/** + * cpuidle_select - ask the cpuidle framework to choose an idle state + * + * @drv: the cpuidle driver + * @dev: the cpuidle device + * + * Returns the index of the idle state. + */ +int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) +{ + return cpuidle_curr_governor->select(drv, dev); +} + +/** + * cpuidle_enter - enter into the specified idle state + * + * @drv: the cpuidle driver tied with the cpu + * @dev: the cpuidle device + * @index: the index in the idle state table + * + * Returns the index in the idle state, < 0 in case of error. + * The error code depends on the backend driver + */ +int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, + int index) +{ + if (cpuidle_state_is_coupled(dev, drv, index)) + return cpuidle_enter_state_coupled(dev, drv, index); + return cpuidle_enter_state(dev, drv, index); +} + +/** + * cpuidle_reflect - tell the underlying governor what was the state + * we were in + * + * @dev : the cpuidle device + * @index: the index in the idle state table + * + */ +void cpuidle_reflect(struct cpuidle_device *dev, int index) +{ + if (cpuidle_curr_governor->reflect) + cpuidle_curr_governor->reflect(dev, index); +} + +/** * cpuidle_idle_call - the main idle loop * * NOTE: no locks or semaphores should be used here @@ -116,26 +181,21 @@ int cpuidle_enter_state(struct cpuidle_d int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv; - int next_state, entered_state; + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + int next_state, entered_state, ret; bool broadcast;
- if (off || !initialized) - return -ENODEV; - - /* check if the device is ready */ - if (!dev || !dev->enabled) - return -EBUSY; - - drv = cpuidle_get_cpu_driver(dev); + ret = cpuidle_enabled(drv, dev); + if (ret < 0) + return ret;
/* ask the governor for the next state */ - next_state = cpuidle_curr_governor->select(drv, dev); + next_state = cpuidle_select(drv, dev); + if (need_resched()) { dev->last_residency = 0; /* give the governor an opportunity to reflect on the outcome */ - if (cpuidle_curr_governor->reflect) - cpuidle_curr_governor->reflect(dev, next_state); + cpuidle_reflect(dev, next_state); local_irq_enable(); return 0; } @@ -146,14 +206,9 @@ int cpuidle_idle_call(void) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) return -EBUSY;
- trace_cpu_idle_rcuidle(next_state, dev->cpu);
- if (cpuidle_state_is_coupled(dev, drv, next_state)) - entered_state = cpuidle_enter_state_coupled(dev, drv, - next_state); - else - entered_state = cpuidle_enter_state(dev, drv, next_state); + entered_state = cpuidle_enter(drv, dev, next_state);
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
@@ -161,8 +216,7 @@ int cpuidle_idle_call(void) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
/* give the governor an opportunity to reflect on the outcome */ - if (cpuidle_curr_governor->reflect) - cpuidle_curr_governor->reflect(dev, entered_state); + cpuidle_reflect(dev, entered_state);
return 0; } Index: cpuidle-next/include/linux/cpuidle.h =================================================================== --- cpuidle-next.orig/include/linux/cpuidle.h +++ cpuidle-next/include/linux/cpuidle.h @@ -119,6 +119,15 @@ struct cpuidle_driver {
#ifdef CONFIG_CPU_IDLE extern void disable_cpuidle(void); + +extern int cpuidle_enabled(struct cpuidle_driver *drv, + struct cpuidle_device *dev); +extern int cpuidle_select(struct cpuidle_driver *drv, + struct cpuidle_device *dev); +extern int cpuidle_enter(struct cpuidle_driver *drv, + struct cpuidle_device *dev, int index); +extern void cpuidle_reflect(struct cpuidle_device *dev, int index); + extern int cpuidle_idle_call(void); extern int cpuidle_register_driver(struct cpuidle_driver *drv); extern struct cpuidle_driver *cpuidle_get_driver(void); @@ -141,6 +150,16 @@ extern int cpuidle_play_dead(void); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); #else static inline void disable_cpuidle(void) { } +static inline int cpuidle_enabled(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return -ENODEV; } +static inline int cpuidle_select(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return -ENODEV; } +static inline int cpuidle_enter(struct cpuidle_driver *drv, + struct cpuidle_device *dev, int index) +{return -ENODEV; } +static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } static inline int cpuidle_idle_call(void) { return -ENODEV; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; }
The cpuidle_idle_call does nothing more than calling the three individuals function and is no longer used by any arch specific code but only in the cpuidle framework code.
We can move this function into the idle task code to ensure better proximity to the scheduler code.
Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Acked-by: Nicolas Pitre nicolas.pitre@linaro.org --- Changelog:
V4: * rebased on the top of tip/master
--- drivers/cpuidle/cpuidle.c | 49 ---------------------------------------- include/linux/cpuidle.h | 2 - kernel/sched/idle.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 51 deletions(-)
Index: cpuidle-next/drivers/cpuidle/cpuidle.c =================================================================== --- cpuidle-next.orig/drivers/cpuidle/cpuidle.c +++ cpuidle-next/drivers/cpuidle/cpuidle.c @@ -173,55 +173,6 @@ void cpuidle_reflect(struct cpuidle_devi }
/** - * cpuidle_idle_call - the main idle loop - * - * NOTE: no locks or semaphores should be used here - * return non-zero on failure - */ -int cpuidle_idle_call(void) -{ - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); - int next_state, entered_state, ret; - bool broadcast; - - ret = cpuidle_enabled(drv, dev); - if (ret < 0) - return ret; - - /* ask the governor for the next state */ - next_state = cpuidle_select(drv, dev); - - if (need_resched()) { - dev->last_residency = 0; - /* give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, next_state); - local_irq_enable(); - return 0; - } - - broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); - - if (broadcast && - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) - return -EBUSY; - - trace_cpu_idle_rcuidle(next_state, dev->cpu); - - entered_state = cpuidle_enter(drv, dev, next_state); - - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); - - if (broadcast) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); - - /* give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, entered_state); - - return 0; -} - -/** * cpuidle_install_idle_handler - installs the cpuidle idle loop handler */ void cpuidle_install_idle_handler(void) Index: cpuidle-next/include/linux/cpuidle.h =================================================================== --- cpuidle-next.orig/include/linux/cpuidle.h +++ cpuidle-next/include/linux/cpuidle.h @@ -128,7 +128,6 @@ extern int cpuidle_enter(struct cpuidle_ struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
-extern int cpuidle_idle_call(void); extern int cpuidle_register_driver(struct cpuidle_driver *drv); extern struct cpuidle_driver *cpuidle_get_driver(void); extern struct cpuidle_driver *cpuidle_driver_ref(void); @@ -160,7 +159,6 @@ static inline int cpuidle_enter(struct c struct cpuidle_device *dev, int index) {return -ENODEV; } static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } -static inline int cpuidle_idle_call(void) { return -ENODEV; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } Index: cpuidle-next/kernel/sched/idle.c =================================================================== --- cpuidle-next.orig/kernel/sched/idle.c +++ cpuidle-next/kernel/sched/idle.c @@ -63,6 +63,62 @@ void __weak arch_cpu_idle(void) local_irq_enable(); }
+#ifdef CONFIG_CPU_IDLE +/** + * cpuidle_idle_call - the main idle function + * + * NOTE: no locks or semaphores should be used here + * return non-zero on failure + */ +static int cpuidle_idle_call(void) +{ + struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + int next_state, entered_state, ret; + bool broadcast; + + ret = cpuidle_enabled(drv, dev); + if (ret < 0) + return ret; + + /* ask the governor for the next state */ + next_state = cpuidle_select(drv, dev); + + if (need_resched()) { + dev->last_residency = 0; + /* give the governor an opportunity to reflect on the outcome */ + cpuidle_reflect(dev, next_state); + local_irq_enable(); + return 0; + } + + broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); + + if (broadcast && + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) + return -EBUSY; + + trace_cpu_idle_rcuidle(next_state, dev->cpu); + + entered_state = cpuidle_enter(drv, dev, next_state); + + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + + if (broadcast) + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + + /* give the governor an opportunity to reflect on the outcome */ + cpuidle_reflect(dev, entered_state); + + return 0; +} +#else +static inline int cpuidle_idle_call(void) +{ + return -ENODEV; +} +#endif + /* * Generic idle loop implementation */
Now that we have the main cpuidle function in idle.c, move some code from the idle mainloop to this function for the sake of clarity.
That removes if then else indentation difficult to follow when looking at the code. This patch does not change the current behavior.
Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Acked-by: Nicolas Pitre nico@linaro.org ---
Changelog:
V4: * no changes V3: * no changes V2: * fixed type in patch's description
--- include/linux/cpuidle.h | 2 ++ kernel/sched/idle.c | 33 +++++++++++++++------------------ 2 files changed, 17 insertions(+), 18 deletions(-)
Index: cpuidle-next/include/linux/cpuidle.h =================================================================== --- cpuidle-next.orig/include/linux/cpuidle.h +++ cpuidle-next/include/linux/cpuidle.h @@ -180,6 +180,8 @@ static inline int cpuidle_enable_device( {return -ENODEV; } static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } +static inline struct cpuidle_driver *cpuidle_get_cpu_driver( + struct cpuidle_device *dev) {return NULL; } #endif
#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED Index: cpuidle-next/kernel/sched/idle.c =================================================================== --- cpuidle-next.orig/kernel/sched/idle.c +++ cpuidle-next/kernel/sched/idle.c @@ -63,7 +63,6 @@ void __weak arch_cpu_idle(void) local_irq_enable(); }
-#ifdef CONFIG_CPU_IDLE /** * cpuidle_idle_call - the main idle function * @@ -77,9 +76,14 @@ static int cpuidle_idle_call(void) int next_state, entered_state, ret; bool broadcast;
+ stop_critical_timings(); + rcu_idle_enter(); + ret = cpuidle_enabled(drv, dev); - if (ret < 0) - return ret; + if (ret < 0) { + arch_cpu_idle(); + goto out; + }
/* ask the governor for the next state */ next_state = cpuidle_select(drv, dev); @@ -89,7 +93,7 @@ static int cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ cpuidle_reflect(dev, next_state); local_irq_enable(); - return 0; + goto out; }
broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); @@ -109,15 +113,15 @@ static int cpuidle_idle_call(void)
/* give the governor an opportunity to reflect on the outcome */ cpuidle_reflect(dev, entered_state); +out: + if (WARN_ON_ONCE(irqs_disabled())) + local_irq_enable(); + + rcu_idle_exit(); + start_critical_timings();
return 0; } -#else -static inline int cpuidle_idle_call(void) -{ - return -ENODEV; -} -#endif
/* * Generic idle loop implementation @@ -150,14 +154,7 @@ static void cpu_idle_loop(void) cpu_idle_poll(); } else { if (!current_clr_polling_and_test()) { - stop_critical_timings(); - rcu_idle_enter(); - if (cpuidle_idle_call()) - arch_cpu_idle(); - if (WARN_ON_ONCE(irqs_disabled())) - local_irq_enable(); - rcu_idle_exit(); - start_critical_timings(); + cpuidle_idle_call(); } else { local_irq_enable(); }
This patch moves the condition before entering idle into the cpuidle main function located in idle.c. That simplify the idle mainloop functions and increase the readibility of the conditions to enter truly idle.
This patch is code reorganization and does not change the behavior of the function.
Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Cc: Peter Zijlstra peterz@infradead.org ---
V4: * took into account clockevents_notify(ENTER) could fail * removed an extra current_clr_polling_and_test added in V3 V3: * reorganized cpuidle_idle_call function as Suggested by Peter Zijlstra https://lkml.org/lkml/2014/2/24/492
--- kernel/sched/idle.c | 84 +++++++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 37 deletions(-)
Index: cpuidle-next/kernel/sched/idle.c =================================================================== --- cpuidle-next.orig/kernel/sched/idle.c +++ cpuidle-next/kernel/sched/idle.c @@ -76,44 +76,59 @@ static int cpuidle_idle_call(void) int next_state, entered_state, ret; bool broadcast;
+ if (current_clr_polling_and_test()) { + local_irq_enable(); + __current_set_polling(); + return 0; + } + stop_critical_timings(); rcu_idle_enter();
ret = cpuidle_enabled(drv, dev); - if (ret < 0) { - arch_cpu_idle(); - goto out; - }
- /* ask the governor for the next state */ - next_state = cpuidle_select(drv, dev); + if (!ret) { + /* ask the governor for the next state */ + next_state = cpuidle_select(drv, dev); + + if (current_clr_polling_and_test()) { + dev->last_residency = 0; + entered_state = next_state; + local_irq_enable(); + } else { + broadcast = !!(drv->states[next_state].flags & + CPUIDLE_FLAG_TIMER_STOP); + + if (broadcast) + ret = clockevents_notify( + CLOCK_EVT_NOTIFY_BROADCAST_ENTER, + &dev->cpu); + + if (!ret) { + trace_cpu_idle_rcuidle(next_state, dev->cpu); + + entered_state = cpuidle_enter(drv, dev, + next_state); + + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, + dev->cpu); + + if (broadcast) + clockevents_notify( + CLOCK_EVT_NOTIFY_BROADCAST_EXIT, + &dev->cpu);
- if (need_resched()) { - dev->last_residency = 0; - /* give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, next_state); - local_irq_enable(); - goto out; + /* give the governor an opportunity to reflect on the outcome */ + cpuidle_reflect(dev, entered_state); + } + } }
- broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); - - if (broadcast && - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) - return -EBUSY; - - trace_cpu_idle_rcuidle(next_state, dev->cpu); - - entered_state = cpuidle_enter(drv, dev, next_state); - - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + if (ret) + arch_cpu_idle();
- if (broadcast) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + __current_set_polling();
- /* give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, entered_state); -out: if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable();
@@ -150,16 +165,11 @@ static void cpu_idle_loop(void) * know that the IPI is going to arrive right * away */ - if (cpu_idle_force_poll || tick_check_broadcast_expired()) { + if (cpu_idle_force_poll || tick_check_broadcast_expired()) cpu_idle_poll(); - } else { - if (!current_clr_polling_and_test()) { - cpuidle_idle_call(); - } else { - local_irq_enable(); - } - __current_set_polling(); - } + else + cpuidle_idle_call(); + arch_cpu_idle_exit(); }
The idle main function is a complex and a critical function. Added more comments to the code.
Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Acked-by: Nicolas Pitre nico@linaro.org --- Changelog:
V4: * updated comments with new code V3: * no changes V2: * fixed typo in comment --- kernel/sched/idle.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-)
Index: cpuidle-next/kernel/sched/idle.c =================================================================== --- cpuidle-next.orig/kernel/sched/idle.c +++ cpuidle-next/kernel/sched/idle.c @@ -76,21 +76,49 @@ static int cpuidle_idle_call(void) int next_state, entered_state, ret; bool broadcast;
+ /* + * Check if the idle task must be rescheduled. If it is the + * case, exit the function after re-enabling the local irq and + * set again the polling flag + */ if (current_clr_polling_and_test()) { local_irq_enable(); __current_set_polling(); return 0; }
+ /* + * During the idle period, stop measuring the disabled irqs + * critical sections latencies + */ stop_critical_timings(); + + /* + * Tell the RCU framework we are entering an idle section, + * so no more rcu read side critical sections and one more + * step to the grace period + */ rcu_idle_enter();
+ /* + * Check if the cpuidle framework is ready, otherwise fallback + * to the default arch specific idle method + */ ret = cpuidle_enabled(drv, dev);
if (!ret) { - /* ask the governor for the next state */ + /* + * Ask the governor to choose an idle state it thinks + * it is convenient to go to. There is *always* a + * convenient idle state + */ next_state = cpuidle_select(drv, dev);
+ /* + * The idle task must be scheduled, it is pointless to + * go to idle, just update no idle residency and get + * out of this function + */ if (current_clr_polling_and_test()) { dev->last_residency = 0; entered_state = next_state; @@ -100,6 +128,14 @@ static int cpuidle_idle_call(void) CPUIDLE_FLAG_TIMER_STOP);
if (broadcast) + /* + * Tell the time framework to switch + * to a broadcast timer because our + * local timer will be shutdown. If a + * local timer is used from another + * cpu as a broadcast timer, this call + * may fail if it is not available + */ ret = clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); @@ -107,6 +143,14 @@ static int cpuidle_idle_call(void) if (!ret) { trace_cpu_idle_rcuidle(next_state, dev->cpu);
+ /* + * Enter the idle state previously + * returned by the governor + * decision. This function will block + * until an interrupt occurs and will + * take care of re-enabling the local + * interrupts + */ entered_state = cpuidle_enter(drv, dev, next_state);
@@ -118,17 +162,28 @@ static int cpuidle_idle_call(void) CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
- /* give the governor an opportunity to reflect on the outcome */ + /* + * Give the governor an opportunity to reflect on the + * outcome + */ cpuidle_reflect(dev, entered_state); } } }
+ /* + * We can't use the cpuidle framework, let's use the default + * idle routine + */ if (ret) arch_cpu_idle();
__current_set_polling();
+ /* + * It is up to the idle functions to enable back the local + * interrupt + */ if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable();
linaro-kernel@lists.linaro.org