Hello Amit,
some comments embedded.
On Wed, Jan 18, 2012 at 02:51:07PM +0530, Amit Daniel Kachhap wrote:
Add a sysfs node code to report effective cooling of all cooling devices attached to each trip points of a thermal zone. The cooling data reported will be absolute if the higher temperature trip points are arranged first otherwise the cooling stats is the cumulative effect of the earlier invoked cooling handlers.
The basic assumption is that cooling devices will bring down the temperature in a symmetric manner and those statistics can be stored back and used for further tuning of the system.
Signed-off-by: Amit Daniel Kachhap amit.kachhap@linaro.org
Documentation/thermal/sysfs-api.txt | 10 ++++ drivers/thermal/thermal_sys.c | 96 +++++++++++++++++++++++++++++++++++ include/linux/thermal.h | 8 +++ 3 files changed, 114 insertions(+), 0 deletions(-)
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index b61e46f..1db9a9d 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -209,6 +209,13 @@ passive Valid values: 0 (disabled) or greater than 1000 RW, Optional +trip_stats
- This attribute presents the effective cooling generated from all the
- cooling devices attached to a trip point. The output is a pair of value
- for each trip point. Each pair represents
- (trip index, cooling temperature difference in millidegree Celsius)
- RO, Optional
- Cooling device attributes *
@@ -261,6 +268,9 @@ method, the sys I/F structure will be built like this: |---cdev0_trip_point: 1 /* cdev0 can be used for passive */ |---cdev1: --->/sys/class/thermal/cooling_device3 |---cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/
- |---trip_stats 0 0
1 8000 /*trip 1 causes 8 degree Celsius drop*/
2 3000 /*trip 2 causes 3 degree Celsius drop*/
|cooling_device0: |---type: Processor diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index dd9a574..47e7b6e 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -92,6 +92,64 @@ static void release_idr(struct idr *idr, struct mutex *lock, int id) if (lock) mutex_unlock(lock); } +static void update_cooling_stats(struct thermal_zone_device *tz, long cur_temp) +{
- int count, max_index, cur_interval;
- long trip_temp, max_temp = 0, cool_temp;
- static int last_trip_level = -1;
I got confused here. Are you sure using a static variable here is safe? I suppose this function is called for any thermal_zone_device, which in turns, may have different amount of trips, and different update rate. You may be using last_trip_level as reference for a different tz. Meaning, you would be screwing the stat buffers silently.
If that is the case, I suggest you to move this to your stat structure.
- if (cur_temp >= tz->last_temperature)
return;
- /* find the trip according to last temperature */
- for (count = 0; count < tz->trips; count++) {
tz->ops->get_trip_temp(tz, count, &trip_temp);
if (tz->last_temperature >= trip_temp) {
if (max_temp < trip_temp) {
max_temp = trip_temp;
max_index = count;
}
}
- }
- if (!max_temp) {
last_trip_level = -1;
return;
- }
- cur_interval = tz->stat[max_index].interval_ptr;
- cool_temp = tz->last_temperature - cur_temp;
- if (last_trip_level != max_index) {
if (++cur_interval == INTERVAL_HISTORY)
cur_interval = 0;
tz->stat[max_index].cool_temp[cur_interval] = cool_temp;
tz->stat[max_index].interval_ptr = cur_interval;
last_trip_level = max_index;
- } else {
tz->stat[max_index].cool_temp[cur_interval] += cool_temp;
Why do you need to sum up here? Wouldn't this break your statistics? (I may completely missed your idea for last_trip_level).
- }
+}
+static int calculate_cooling_temp_avg(struct thermal_zone_device *tz, int trip,
int *avg_cool)
+{
- int result = 0, count = 0, used_data = 0;
- if (trip > THERMAL_MAX_TRIPS)
Shouldn't this be checked against tz->trips ? At least you allocate your *stat based on it.
return -EINVAL;
- *avg_cool = 0;
- for (count = 0; count < INTERVAL_HISTORY; count++) {
if (tz->stat[trip].cool_temp[count] > 0) {
*avg_cool += tz->stat[trip].cool_temp[count];
used_data++;
}
- }
- if (used_data > 1)
*avg_cool = (*avg_cool)/used_data;
IIRC, the preferred operator style is (*avg_cool) / used_data
- return result;
result is used only to hold a 0 here?
+} /* sys I/F for thermal zone */ @@ -493,6 +551,26 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%ld\n", temperature); } +static ssize_t +thermal_cooling_trip_stats_show(struct device *dev,
struct device_attribute *attr,
char *buf)
+{
- struct thermal_zone_device *tz = to_thermal_zone(dev);
- int avg_cool = 0, result, trip_index;
- ssize_t len = 0;
- for (trip_index = 0; trip_index < tz->trips; trip_index++) {
result = calculate_cooling_temp_avg(tz,
trip_index, &avg_cool);
if (!result)
len += sprintf(buf + len, "%d %d\n",
trip_index, avg_cool);
- }
- return len;
+} +static DEVICE_ATTR(trip_stats, 0444,
thermal_cooling_trip_stats_show, NULL);
static struct thermal_hwmon_device * thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) @@ -1049,6 +1127,8 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) goto leave; }
- update_cooling_stats(tz, temp);
- for (count = 0; count < tz->trips; count++) { tz->ops->get_trip_type(tz, count, &trip_type); tz->ops->get_trip_temp(tz, count, &trip_temp);
@@ -1181,6 +1261,13 @@ struct thermal_zone_device *thermal_zone_device_register(char *type, return ERR_PTR(result); }
- /*Allocate variables for cooling stats*/
- tz->stat = devm_kzalloc(&tz->device,
sizeof(struct thermal_cooling_stats) * trips,
GFP_KERNEL);
You never free this memory here.
- if (!tz->stat)
goto unregister;
- /* sys I/F */ if (type) { result = device_create_file(&tz->device, &dev_attr_type);
@@ -1207,6 +1294,12 @@ struct thermal_zone_device *thermal_zone_device_register(char *type, passive = 1; }
- if (trips > 0) {
result = device_create_file(&tz->device, &dev_attr_trip_stats);
if (result)
goto unregister;
The failing paths after your allocation point must consider freeing the memory you requested.
- }
- if (!passive) result = device_create_file(&tz->device, &dev_attr_passive);
@@ -1282,6 +1375,9 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) for (count = 0; count < tz->trips; count++) TRIP_POINT_ATTR_REMOVE(&tz->device, count);
- if (tz->trips > 0)
device_remove_file(&tz->device, &dev_attr_trip_stats);
Amit,
I think here it would be a good place to free the memory you allocated for *stat
thermal_remove_hwmon_sysfs(tz); release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id); idr_destroy(&tz->idr); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 47b4a27..47504c7 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -72,6 +72,13 @@ struct thermal_cooling_device_ops { #define THERMAL_TRIPS_NONE -1 #define THERMAL_MAX_TRIPS 12 #define THERMAL_NAME_LENGTH 20 +#define INTERVAL_HISTORY 12
+struct thermal_cooling_stats {
- int cool_temp[INTERVAL_HISTORY];
- int interval_ptr;
+};
struct thermal_cooling_device { int id; char type[THERMAL_NAME_LENGTH]; @@ -102,6 +109,7 @@ struct thermal_zone_device { struct list_head cooling_devices; struct idr idr; struct mutex lock; /* protect cooling devices list */
- struct thermal_cooling_stats *stat; struct list_head node; struct delayed_work poll_queue;
};
1.7.1
linux-pm mailing list linux-pm@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-pm