> 文章列表 > Linux内核Thermal框架详解十三、Thermal Governor(3)

Linux内核Thermal框架详解十三、Thermal Governor(3)

Linux内核Thermal框架详解十三、Thermal Governor(3)

接前一篇文章Linux内核Thermal框架详解十二、Thermal Governor(2)

二、具体温控策略

上一篇文章介绍并详细分析了bang_bang governor的源码。本文介绍第2种温控策略:fair_share。

2. fair_share

fair_share governor总的策略是频率档位⽐较多的cooling device优先降频。

fair_share governor的代码在drivers/thermal/gov_fair_share.c中,也很简短,一共才124行,有效代码不到100行。如下所示:

#include <linux/thermal.h>
#include <trace/events/thermal.h>#include "thermal_core.h"/* get_trip_level: - obtains the current trip level for a zone* @tz:		thermal zone device*/
static int get_trip_level(struct thermal_zone_device *tz)
{int count = 0;int trip_temp;enum thermal_trip_type trip_type;if (tz->trips == 0 || !tz->ops->get_trip_temp)return 0;for (count = 0; count < tz->trips; count++) {tz->ops->get_trip_temp(tz, count, &trip_temp);if (tz->temperature < trip_temp)break;}/ count > 0 only if temperature is greater than first trip* point, in which case, trip_point = count - 1*/if (count > 0) {tz->ops->get_trip_type(tz, count - 1, &trip_type);trace_thermal_zone_trip(tz, count - 1, trip_type);}return count;
}static long get_target_state(struct thermal_zone_device *tz,struct thermal_cooling_device *cdev, int percentage, int level)
{unsigned long max_state;cdev->ops->get_max_state(cdev, &max_state);return (long)(percentage * level * max_state) / (100 * tz->trips);
}/* fair_share_throttle - throttles devices associated with the given zone* @tz: thermal_zone_device* @trip: trip point index Throttling Logic: This uses three parameters to calculate the new* throttle state of the cooling devices associated with the given zone. Parameters used for Throttling:* P1. max_state: Maximum throttle state exposed by the cooling device.* P2. percentage[i]/100:*	How 'effective' the 'i'th device is, in cooling the given zone.* P3. cur_trip_level/max_no_of_trips:*	This describes the extent to which the devices should be throttled.*	We do not want to throttle too much when we trip a lower temperature,*	whereas the throttling is at full swing if we trip critical levels.*	(Heavily assumes the trip points are in ascending order)* new_state of cooling device = P3 * P2 * P1*/
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{struct thermal_instance *instance;int total_weight = 0;int total_instance = 0;int cur_trip_level = get_trip_level(tz);mutex_lock(&tz->lock);list_for_each_entry(instance, &tz->thermal_instances, tz_node) {if (instance->trip != trip)continue;total_weight += instance->weight;total_instance++;}list_for_each_entry(instance, &tz->thermal_instances, tz_node) {int percentage;struct thermal_cooling_device *cdev = instance->cdev;if (instance->trip != trip)continue;if (!total_weight)percentage = 100 / total_instance;elsepercentage = (instance->weight * 100) / total_weight;instance->target = get_target_state(tz, cdev, percentage,cur_trip_level);mutex_lock(&cdev->lock);__thermal_cdev_update(cdev);mutex_unlock(&cdev->lock);}mutex_unlock(&tz->lock);return 0;
}static struct thermal_governor thermal_gov_fair_share = {.name		= "fair_share",.throttle	= fair_share_throttle,
};
THERMAL_GOVERNOR_DECLARE(thermal_gov_fair_share);

同样是麻雀虽小,五脏俱全。别看代码行数比较少,但是背后的机制却并不简单。一段一段来进行分析。

(1)THERMAL_GOVERNOR_DECLARE相关代码

先来看THERMAL_GOVERNOR_DECLARE。它是一个宏定义,在drivers/thermal/thermal_core.h中,代码如下:

/* Init section thermal table */
extern struct thermal_governor *__governor_thermal_table[];
extern struct thermal_governor *__governor_thermal_table_end[];#define THERMAL_TABLE_ENTRY(table, name)			\\static typeof(name) *__thermal_table_entry_##name	\\__used __section("__" #table "_thermal_table") = &name#define THERMAL_GOVERNOR_DECLARE(name)	THERMAL_TABLE_ENTRY(governor, name)

实际上这段代码在前文Linux内核Thermal框架详解四、Thermal Core(3)中已经进行了详细分析,这里就不再赘述了。不过为了便于理解和加深印象,将fair_share governor展开后的代码再次列出:

static struct thermal_governor thermal_gov_fair_share = {.name		= "fair_share",.throttle	= fair_share_throttle,
};static struct thermal_governor *__thermal_table_entry_thermal_gov_fair_share    \\__used __section("__governor_thermal_table") = &thermal_gov_fair_share

Thermal Governor都是通过THERMAL_GOVERNOR_DECLARE定义到了__governor_thermal_table这段空间内。然后在thermal core初始化时通过调用thermal_register_governors来注册到thermal_governor_list链表中。再之后通过经由“thermal_init->thermal_register_governors-> thermal_set_governor”路径和thermal zone device关联上。

(2)handle_non_critical_trips

struct thermal_governor中有一个成员throttle,其是一个函数指针:

int (*throttle)(struct thermal_zone_device *tz, int trip);

对于对象thermal_gov_fair_share来说,指向了fair_share_throttle函数。在解析fair_share_throttle函数之前,有一个问题必须弄清楚:这个函数是何时被调用的?

是在drivers/thermal/thermal_core.c的handle_non_critical_trips函数中,代码如下:

static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip)
{tz->governor ? tz->governor->throttle(tz, trip) :def_governor->throttle(tz, trip);
}

那么又是哪里调用的handle_non_critical_trips?是在drivers/thermal/thermal_core.c的handle_thermal_trip函数中,代码如下:

static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
{enum thermal_trip_type type;int trip_temp, hyst = 0;/* Ignore disabled trip points */if (test_bit(trip, &tz->trips_disabled))return;tz->ops->get_trip_temp(tz, trip, &trip_temp);tz->ops->get_trip_type(tz, trip, &type);if (tz->ops->get_trip_hyst)tz->ops->get_trip_hyst(tz, trip, &hyst);if (tz->last_temperature != THERMAL_TEMP_INVALID) {if (tz->last_temperature < trip_temp &&tz->temperature >= trip_temp)thermal_notify_tz_trip_up(tz->id, trip,tz->temperature);if (tz->last_temperature >= trip_temp &&tz->temperature < (trip_temp - hyst))thermal_notify_tz_trip_down(tz->id, trip,tz->temperature);}if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)handle_critical_trips(tz, trip, type);elsehandle_non_critical_trips(tz, trip);/ Alright, we handled this trip successfully.* So, start monitoring again.*/monitor_thermal_zone(tz);
}

对于handle_thermal_trip函数的详细分析有专门的文章章节,由于本篇文章专注于fair_share governor,故在此不深入展开。

(3)fair_share_throttle

再贴一下此函数代码:

/* fair_share_throttle - throttles devices associated with the given zone* @tz: thermal_zone_device* @trip: trip point index Throttling Logic: This uses three parameters to calculate the new* throttle state of the cooling devices associated with the given zone. Parameters used for Throttling:* P1. max_state: Maximum throttle state exposed by the cooling device.* P2. percentage[i]/100:*	How 'effective' the 'i'th device is, in cooling the given zone.* P3. cur_trip_level/max_no_of_trips:*	This describes the extent to which the devices should be throttled.*	We do not want to throttle too much when we trip a lower temperature,*	whereas the throttling is at full swing if we trip critical levels.*	(Heavily assumes the trip points are in ascending order)* new_state of cooling device = P3 * P2 * P1*/
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{struct thermal_instance *instance;int total_weight = 0;int total_instance = 0;int cur_trip_level = get_trip_level(tz);mutex_lock(&tz->lock);list_for_each_entry(instance, &tz->thermal_instances, tz_node) {if (instance->trip != trip)continue;total_weight += instance->weight;total_instance++;}list_for_each_entry(instance, &tz->thermal_instances, tz_node) {int percentage;struct thermal_cooling_device *cdev = instance->cdev;if (instance->trip != trip)continue;if (!total_weight)percentage = 100 / total_instance;elsepercentage = (instance->weight * 100) / total_weight;instance->target = get_target_state(tz, cdev, percentage,cur_trip_level);mutex_lock(&cdev->lock);__thermal_cdev_update(cdev);mutex_unlock(&cdev->lock);}mutex_unlock(&tz->lock);return 0;
}

函数注释已经将函数功能说得很清楚了:对与给定thermal zone关联的设备进行节流。调节逻辑如下:

使用3个参数计算与给定thermal zone相关联的冷却设备的最新throttle state。

用于节流的3个参数(注意不是函数的参数):

  • 参数1. max_state

冷却设备暴露的最大throttle state。

  • 参数2. percentage[i]/100

第i个设备在冷却给定区域方面的”有效性”。

  • 参数3. cur_trip_level/max_no_of_trips

这个参数描述设备应被节流的限度。当到达较低的温度时,不需要节流太多;反之如果在临界水平,节流就处于全开状态。在很大程度上假设跳闸点按升序排列。

new_state of cooling device = P3 * P2 * P1

代码的大致流程如下:

1)得到指定thermal zone的trip level

通过get_trip_level(tz)得到指定thermal zone的trip level。

get_trip_level函数在同文件(drivers/thermal/gov_fair_share.c)中实现,代码如下:

/* get_trip_level: - obtains the current trip level for a zone* @tz:		thermal zone device*/
static int get_trip_level(struct thermal_zone_device *tz)
{int count = 0;int trip_temp;enum thermal_trip_type trip_type;if (tz->trips == 0 || !tz->ops->get_trip_temp)return 0;for (count = 0; count < tz->trips; count++) {tz->ops->get_trip_temp(tz, count, &trip_temp);if (tz->temperature < trip_temp)break;}/ count > 0 only if temperature is greater than first trip* point, in which case, trip_point = count - 1*/if (count > 0) {tz->ops->get_trip_type(tz, count - 1, &trip_type);trace_thermal_zone_trip(tz, count - 1, trip_type);}return count;
}

依次遍历各个触发点(trips),并得到相应触发点的温度。如果给定thermal zone的温度小于某一触发点的温度,则跳出循环。

未完待续……