Linux内核Thermal框架详解十三、Thermal Governor(3)
接前一篇文章Linux内核Thermal框架详解十二、Thermal Governor(2)
二、具体温控策略
上一篇文章介绍并详细分析了bang_bang governor的源码。本文介绍第2种温控策略:fair_share。
2. fair_share
fair_share governor总的策略是频率档位⽐较多的cooling device优先降频。
fair_share governor的代码在drivers/thermal/gov_fair_share.c中,也很简短,一共才124行,有效代码不到100行。如下所示:
#include <linux/thermal.h>
#include <trace/events/thermal.h>#include "thermal_core.h"/* get_trip_level: - obtains the current trip level for a zone* @tz: thermal zone device*/
static int get_trip_level(struct thermal_zone_device *tz)
{int count = 0;int trip_temp;enum thermal_trip_type trip_type;if (tz->trips == 0 || !tz->ops->get_trip_temp)return 0;for (count = 0; count < tz->trips; count++) {tz->ops->get_trip_temp(tz, count, &trip_temp);if (tz->temperature < trip_temp)break;}/ count > 0 only if temperature is greater than first trip* point, in which case, trip_point = count - 1*/if (count > 0) {tz->ops->get_trip_type(tz, count - 1, &trip_type);trace_thermal_zone_trip(tz, count - 1, trip_type);}return count;
}static long get_target_state(struct thermal_zone_device *tz,struct thermal_cooling_device *cdev, int percentage, int level)
{unsigned long max_state;cdev->ops->get_max_state(cdev, &max_state);return (long)(percentage * level * max_state) / (100 * tz->trips);
}/* fair_share_throttle - throttles devices associated with the given zone* @tz: thermal_zone_device* @trip: trip point index Throttling Logic: This uses three parameters to calculate the new* throttle state of the cooling devices associated with the given zone. Parameters used for Throttling:* P1. max_state: Maximum throttle state exposed by the cooling device.* P2. percentage[i]/100:* How 'effective' the 'i'th device is, in cooling the given zone.* P3. cur_trip_level/max_no_of_trips:* This describes the extent to which the devices should be throttled.* We do not want to throttle too much when we trip a lower temperature,* whereas the throttling is at full swing if we trip critical levels.* (Heavily assumes the trip points are in ascending order)* new_state of cooling device = P3 * P2 * P1*/
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{struct thermal_instance *instance;int total_weight = 0;int total_instance = 0;int cur_trip_level = get_trip_level(tz);mutex_lock(&tz->lock);list_for_each_entry(instance, &tz->thermal_instances, tz_node) {if (instance->trip != trip)continue;total_weight += instance->weight;total_instance++;}list_for_each_entry(instance, &tz->thermal_instances, tz_node) {int percentage;struct thermal_cooling_device *cdev = instance->cdev;if (instance->trip != trip)continue;if (!total_weight)percentage = 100 / total_instance;elsepercentage = (instance->weight * 100) / total_weight;instance->target = get_target_state(tz, cdev, percentage,cur_trip_level);mutex_lock(&cdev->lock);__thermal_cdev_update(cdev);mutex_unlock(&cdev->lock);}mutex_unlock(&tz->lock);return 0;
}static struct thermal_governor thermal_gov_fair_share = {.name = "fair_share",.throttle = fair_share_throttle,
};
THERMAL_GOVERNOR_DECLARE(thermal_gov_fair_share);
同样是麻雀虽小,五脏俱全。别看代码行数比较少,但是背后的机制却并不简单。一段一段来进行分析。
(1)THERMAL_GOVERNOR_DECLARE相关代码
先来看THERMAL_GOVERNOR_DECLARE。它是一个宏定义,在drivers/thermal/thermal_core.h中,代码如下:
/* Init section thermal table */
extern struct thermal_governor *__governor_thermal_table[];
extern struct thermal_governor *__governor_thermal_table_end[];#define THERMAL_TABLE_ENTRY(table, name) \\static typeof(name) *__thermal_table_entry_##name \\__used __section("__" #table "_thermal_table") = &name#define THERMAL_GOVERNOR_DECLARE(name) THERMAL_TABLE_ENTRY(governor, name)
实际上这段代码在前文Linux内核Thermal框架详解四、Thermal Core(3)中已经进行了详细分析,这里就不再赘述了。不过为了便于理解和加深印象,将fair_share governor展开后的代码再次列出:
static struct thermal_governor thermal_gov_fair_share = {.name = "fair_share",.throttle = fair_share_throttle,
};static struct thermal_governor *__thermal_table_entry_thermal_gov_fair_share \\__used __section("__governor_thermal_table") = &thermal_gov_fair_share
Thermal Governor都是通过THERMAL_GOVERNOR_DECLARE定义到了__governor_thermal_table这段空间内。然后在thermal core初始化时通过调用thermal_register_governors来注册到thermal_governor_list链表中。再之后通过经由“thermal_init->thermal_register_governors-> thermal_set_governor”路径和thermal zone device关联上。
(2)handle_non_critical_trips
struct thermal_governor中有一个成员throttle,其是一个函数指针:
int (*throttle)(struct thermal_zone_device *tz, int trip);
对于对象thermal_gov_fair_share来说,指向了fair_share_throttle函数。在解析fair_share_throttle函数之前,有一个问题必须弄清楚:这个函数是何时被调用的?
是在drivers/thermal/thermal_core.c的handle_non_critical_trips函数中,代码如下:
static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip)
{tz->governor ? tz->governor->throttle(tz, trip) :def_governor->throttle(tz, trip);
}
那么又是哪里调用的handle_non_critical_trips?是在drivers/thermal/thermal_core.c的handle_thermal_trip函数中,代码如下:
static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
{enum thermal_trip_type type;int trip_temp, hyst = 0;/* Ignore disabled trip points */if (test_bit(trip, &tz->trips_disabled))return;tz->ops->get_trip_temp(tz, trip, &trip_temp);tz->ops->get_trip_type(tz, trip, &type);if (tz->ops->get_trip_hyst)tz->ops->get_trip_hyst(tz, trip, &hyst);if (tz->last_temperature != THERMAL_TEMP_INVALID) {if (tz->last_temperature < trip_temp &&tz->temperature >= trip_temp)thermal_notify_tz_trip_up(tz->id, trip,tz->temperature);if (tz->last_temperature >= trip_temp &&tz->temperature < (trip_temp - hyst))thermal_notify_tz_trip_down(tz->id, trip,tz->temperature);}if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)handle_critical_trips(tz, trip, type);elsehandle_non_critical_trips(tz, trip);/ Alright, we handled this trip successfully.* So, start monitoring again.*/monitor_thermal_zone(tz);
}
对于handle_thermal_trip函数的详细分析有专门的文章章节,由于本篇文章专注于fair_share governor,故在此不深入展开。
(3)fair_share_throttle
再贴一下此函数代码:
/* fair_share_throttle - throttles devices associated with the given zone* @tz: thermal_zone_device* @trip: trip point index Throttling Logic: This uses three parameters to calculate the new* throttle state of the cooling devices associated with the given zone. Parameters used for Throttling:* P1. max_state: Maximum throttle state exposed by the cooling device.* P2. percentage[i]/100:* How 'effective' the 'i'th device is, in cooling the given zone.* P3. cur_trip_level/max_no_of_trips:* This describes the extent to which the devices should be throttled.* We do not want to throttle too much when we trip a lower temperature,* whereas the throttling is at full swing if we trip critical levels.* (Heavily assumes the trip points are in ascending order)* new_state of cooling device = P3 * P2 * P1*/
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{struct thermal_instance *instance;int total_weight = 0;int total_instance = 0;int cur_trip_level = get_trip_level(tz);mutex_lock(&tz->lock);list_for_each_entry(instance, &tz->thermal_instances, tz_node) {if (instance->trip != trip)continue;total_weight += instance->weight;total_instance++;}list_for_each_entry(instance, &tz->thermal_instances, tz_node) {int percentage;struct thermal_cooling_device *cdev = instance->cdev;if (instance->trip != trip)continue;if (!total_weight)percentage = 100 / total_instance;elsepercentage = (instance->weight * 100) / total_weight;instance->target = get_target_state(tz, cdev, percentage,cur_trip_level);mutex_lock(&cdev->lock);__thermal_cdev_update(cdev);mutex_unlock(&cdev->lock);}mutex_unlock(&tz->lock);return 0;
}
函数注释已经将函数功能说得很清楚了:对与给定thermal zone关联的设备进行节流。调节逻辑如下:
使用3个参数计算与给定thermal zone相关联的冷却设备的最新throttle state。
用于节流的3个参数(注意不是函数的参数):
- 参数1. max_state
冷却设备暴露的最大throttle state。
- 参数2. percentage[i]/100
第i个设备在冷却给定区域方面的”有效性”。
- 参数3. cur_trip_level/max_no_of_trips
这个参数描述设备应被节流的限度。当到达较低的温度时,不需要节流太多;反之如果在临界水平,节流就处于全开状态。在很大程度上假设跳闸点按升序排列。
new_state of cooling device = P3 * P2 * P1
代码的大致流程如下:
1)得到指定thermal zone的trip level
通过get_trip_level(tz)得到指定thermal zone的trip level。
get_trip_level函数在同文件(drivers/thermal/gov_fair_share.c)中实现,代码如下:
/* get_trip_level: - obtains the current trip level for a zone* @tz: thermal zone device*/
static int get_trip_level(struct thermal_zone_device *tz)
{int count = 0;int trip_temp;enum thermal_trip_type trip_type;if (tz->trips == 0 || !tz->ops->get_trip_temp)return 0;for (count = 0; count < tz->trips; count++) {tz->ops->get_trip_temp(tz, count, &trip_temp);if (tz->temperature < trip_temp)break;}/ count > 0 only if temperature is greater than first trip* point, in which case, trip_point = count - 1*/if (count > 0) {tz->ops->get_trip_type(tz, count - 1, &trip_type);trace_thermal_zone_trip(tz, count - 1, trip_type);}return count;
}
依次遍历各个触发点(trips),并得到相应触发点的温度。如果给定thermal zone的温度小于某一触发点的温度,则跳出循环。
未完待续……