2023-03-12 00:55:40 +00:00
|
|
|
From 8cb6f5bf4f355b0169e3f60c559a5e2203c1a988 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Tue, 18 Oct 2022 04:22:40 -0700
|
|
|
|
Subject: [PATCH] thermal: intel: hfi: Improve the type of
|
|
|
|
hfi_features::nr_table_pages
|
|
|
|
|
|
|
|
A Coverity static code scan raised a potential overflow_before_widen
|
|
|
|
warning when hfi_features::nr_table_pages is used as an argument to
|
|
|
|
memcpy in intel_hfi_process_event().
|
|
|
|
|
|
|
|
Even though the overflow can never happen (the maximum number of pages of
|
|
|
|
the HFI table is 0x10 and 0x10 << PAGE_SHIFT = 0x10000), using size_t as
|
|
|
|
the data type of hfi_features::nr_table_pages makes Coverity happy and
|
|
|
|
matches the data type of the argument 'size' of memcpy().
|
|
|
|
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
drivers/thermal/intel/intel_hfi.c | 2 +-
|
|
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
|
|
|
|
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index a0640f762dc5d..239afe02e5182 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/drivers/thermal/intel/intel_hfi.c
|
|
|
|
+++ b/drivers/thermal/intel/intel_hfi.c
|
|
|
|
@@ -137,7 +137,7 @@ struct hfi_instance {
|
|
|
|
* Parameters and supported features that are common to all HFI instances
|
|
|
|
*/
|
|
|
|
struct hfi_features {
|
|
|
|
- unsigned int nr_table_pages;
|
|
|
|
+ size_t nr_table_pages;
|
|
|
|
unsigned int cpu_stride;
|
|
|
|
unsigned int hdr_size;
|
|
|
|
};
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 8f590ad883a1330a99f0bbd18d76c4631d7fddbb Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:29 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Generalize asym_packing logic for SMT cores
|
|
|
|
|
|
|
|
When doing asym_packing load balancing between cores, all we care is that
|
|
|
|
the destination core is fully idle (including SMT siblings, if any) and
|
|
|
|
that the busiest candidate scheduling group has exactly one busy CPU. It is
|
|
|
|
irrelevant whether the candidate busiest core is non-SMT, SMT2, SMT4, SMT8,
|
|
|
|
etc.
|
|
|
|
|
|
|
|
Do not handle the candidate busiest non-SMT vs SMT cases separately. Simply
|
|
|
|
do the two checks described above. Let find_busiest_group() handle bigger
|
|
|
|
imbalances in the number of idle CPUs.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Reviewed-by: Len Brown <len.brown@intel.com>
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 41 ++++++++++++++---------------------------
|
|
|
|
1 file changed, 14 insertions(+), 27 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 2c3d0d49c80ea..8b5fc8e86addb 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -9042,13 +9042,11 @@ group_type group_classify(unsigned int imbalance_pct,
|
|
|
|
* the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
|
|
|
|
* only if @dst_cpu has higher priority.
|
|
|
|
*
|
|
|
|
- * If both @dst_cpu and @sg have SMT siblings, and @sg has exactly one more
|
|
|
|
- * busy CPU than @sds::local, let @dst_cpu pull tasks if it has higher priority.
|
|
|
|
- * Bigger imbalances in the number of busy CPUs will be dealt with in
|
|
|
|
- * update_sd_pick_busiest().
|
|
|
|
- *
|
|
|
|
- * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
|
|
|
|
- * of @dst_cpu are idle and @sg has lower priority.
|
|
|
|
+ * If @dst_cpu has SMT siblings, check if there are no running tasks in
|
|
|
|
+ * @sds::local. In such case, decide based on the priority of @sg. Do it only
|
|
|
|
+ * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
|
|
|
|
+ * imbalances in the number of busy CPUs will be dealt with in
|
|
|
|
+ * find_busiest_group().
|
|
|
|
*
|
|
|
|
* Return: true if @dst_cpu can pull tasks, false otherwise.
|
|
|
|
*/
|
|
|
|
@@ -9057,12 +9055,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
|
|
|
|
struct sched_group *sg)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SCHED_SMT
|
|
|
|
- bool local_is_smt, sg_is_smt;
|
|
|
|
+ bool local_is_smt;
|
|
|
|
int sg_busy_cpus;
|
|
|
|
|
|
|
|
local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
|
|
|
|
- sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
|
|
|
|
-
|
|
|
|
sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
|
|
|
|
|
|
|
|
if (!local_is_smt) {
|
|
|
|
@@ -9083,25 +9079,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
|
|
|
|
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
- /* @dst_cpu has SMT siblings. */
|
|
|
|
-
|
|
|
|
- if (sg_is_smt) {
|
|
|
|
- int local_busy_cpus = sds->local->group_weight -
|
|
|
|
- sds->local_stat.idle_cpus;
|
|
|
|
- int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
|
|
|
|
-
|
|
|
|
- if (busy_cpus_delta == 1)
|
|
|
|
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
|
|
|
|
-
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
/*
|
|
|
|
- * @sg does not have SMT siblings. Ensure that @sds::local does not end
|
|
|
|
- * up with more than one busy SMT sibling and only pull tasks if there
|
|
|
|
- * are not busy CPUs (i.e., no CPU has running tasks).
|
|
|
|
+ * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
|
|
|
|
+ * all its siblings are idle (moving tasks between physical cores in
|
|
|
|
+ * which some SMT siblings are busy results in the same throughput).
|
|
|
|
+ *
|
|
|
|
+ * If the difference in the number of busy CPUs is two or more, let
|
|
|
|
+ * find_busiest_group() take care of it. We only care if @sg has
|
|
|
|
+ * exactly one busy CPU. This covers SMT and non-SMT sched groups.
|
|
|
|
*/
|
|
|
|
- if (!sds->local_stat.sum_nr_running)
|
|
|
|
+ if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
|
|
|
|
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 5a3b5eb5f79e51634f5fd173c0949c5293c93566 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:30 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Move is_core_idle() out of CONFIG_NUMA
|
|
|
|
|
|
|
|
asym_packing needs this function to determine whether an SMT core is a
|
|
|
|
suitable destination for load balancing.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 34 +++++++++++++++++-----------------
|
|
|
|
1 file changed, 17 insertions(+), 17 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 8b5fc8e86addb..98c64f1db20e0 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -1049,6 +1049,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
|
* Scheduling class queueing methods:
|
|
|
|
*/
|
|
|
|
|
|
|
|
+static inline bool is_core_idle(int cpu)
|
|
|
|
+{
|
|
|
|
+#ifdef CONFIG_SCHED_SMT
|
|
|
|
+ int sibling;
|
|
|
|
+
|
|
|
|
+ for_each_cpu(sibling, cpu_smt_mask(cpu)) {
|
|
|
|
+ if (cpu == sibling)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ if (!idle_cpu(sibling))
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
#define NUMA_IMBALANCE_MIN 2
|
|
|
|
|
|
|
|
@@ -1688,23 +1705,6 @@ struct numa_stats {
|
|
|
|
int idle_cpu;
|
|
|
|
};
|
|
|
|
|
|
|
|
-static inline bool is_core_idle(int cpu)
|
|
|
|
-{
|
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
|
- int sibling;
|
|
|
|
-
|
|
|
|
- for_each_cpu(sibling, cpu_smt_mask(cpu)) {
|
|
|
|
- if (cpu == sibling)
|
|
|
|
- continue;
|
|
|
|
-
|
|
|
|
- if (!idle_cpu(sibling))
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
- return true;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
struct task_numa_env {
|
|
|
|
struct task_struct *p;
|
|
|
|
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From d4ba60a8be784dc7ed866fb52ff94519eb9d1586 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:31 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Only do asym_packing load balancing from fully
|
|
|
|
idle SMT cores
|
|
|
|
|
|
|
|
When balancing load between cores, all the SMT siblings of the destination
|
|
|
|
CPU, if any, must be idle. Otherwise, pulling new tasks degrades the
|
|
|
|
throughput of the busy SMT siblings. The overall throughput of the system
|
|
|
|
remains the same.
|
|
|
|
|
|
|
|
When balancing load within an SMT core this consideration is not relevant
|
|
|
|
relevant. Follow the priorities that hardware indicates.
|
|
|
|
|
|
|
|
Using is_core_idle() renders checking !sds->local_stat.sum_nr_running
|
|
|
|
redundant. Remove it.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Suggested-by: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 34 +++++++++++++++++++++++++---------
|
|
|
|
1 file changed, 25 insertions(+), 9 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 98c64f1db20e0..f74777fc78d7d 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -9038,12 +9038,14 @@ group_type group_classify(unsigned int imbalance_pct,
|
|
|
|
* Check the state of the SMT siblings of both @sds::local and @sg and decide
|
|
|
|
* if @dst_cpu can pull tasks.
|
|
|
|
*
|
|
|
|
+ * This function must be called only if all the SMT siblings of @dst_cpu are
|
|
|
|
+ * idle, if any.
|
|
|
|
+ *
|
|
|
|
* If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
|
|
|
|
* the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
|
|
|
|
* only if @dst_cpu has higher priority.
|
|
|
|
*
|
|
|
|
- * If @dst_cpu has SMT siblings, check if there are no running tasks in
|
|
|
|
- * @sds::local. In such case, decide based on the priority of @sg. Do it only
|
|
|
|
+ * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
|
|
|
|
* if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
|
|
|
|
* imbalances in the number of busy CPUs will be dealt with in
|
|
|
|
* find_busiest_group().
|
|
|
|
@@ -9080,15 +9082,13 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
- * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
|
|
|
|
- * all its siblings are idle (moving tasks between physical cores in
|
|
|
|
- * which some SMT siblings are busy results in the same throughput).
|
|
|
|
+ * @dst_cpu has SMT siblings and are also idle.
|
|
|
|
*
|
|
|
|
* If the difference in the number of busy CPUs is two or more, let
|
|
|
|
* find_busiest_group() take care of it. We only care if @sg has
|
|
|
|
* exactly one busy CPU. This covers SMT and non-SMT sched groups.
|
|
|
|
*/
|
|
|
|
- if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
|
|
|
|
+ if (sg_busy_cpus == 1)
|
|
|
|
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
@@ -9102,7 +9102,14 @@ static inline bool
|
|
|
|
sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
|
|
|
|
struct sched_group *group)
|
|
|
|
{
|
|
|
|
- /* Only do SMT checks if either local or candidate have SMT siblings */
|
|
|
|
+ /*
|
|
|
|
+ * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
|
|
|
|
+ * is not sufficient. We need to make sure the whole core is idle.
|
|
|
|
+ */
|
|
|
|
+ if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ /* Only do SMT checks if either local or candidate have SMT siblings. */
|
|
|
|
if ((sds->local->flags & SD_SHARE_CPUCAPACITY) ||
|
|
|
|
(group->flags & SD_SHARE_CPUCAPACITY))
|
|
|
|
return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group);
|
|
|
|
@@ -11049,8 +11056,17 @@ static void nohz_balancer_kick(struct rq *rq)
|
|
|
|
*/
|
|
|
|
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
|
|
|
|
if (sched_asym_prefer(i, cpu)) {
|
|
|
|
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
|
|
|
|
- goto unlock;
|
|
|
|
+ /*
|
|
|
|
+ * Always do ASYM_PACKING balance in the SMT
|
|
|
|
+ * domain. In upper domains, the core must be
|
|
|
|
+ * fully idle.
|
|
|
|
+ */
|
|
|
|
+ if (sd->flags & SD_SHARE_CPUCAPACITY ||
|
|
|
|
+ (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
|
|
|
|
+ is_core_idle(i))) {
|
|
|
|
+ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
|
|
|
|
+ goto unlock;
|
|
|
|
+ }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 20641917bc6bad6f81bafe1bac213c1c3e70ed09 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:32 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Let low-priority cores help high-priority busy
|
|
|
|
SMT cores
|
|
|
|
|
|
|
|
Using asym_packing priorities within an SMT core is straightforward. Just
|
|
|
|
follow the priorities that hardware indicates.
|
|
|
|
|
|
|
|
When balancing load from an SMT core, also consider the idle of its
|
|
|
|
siblings. Priorities do not reflect that an SMT core divides its throughput
|
|
|
|
among all its busy siblings. They only makes sense when exactly one sibling
|
|
|
|
is busy.
|
|
|
|
|
|
|
|
Indicate that active balance is needed if the destination CPU has lower
|
|
|
|
priority than the source CPU but the latter has busy SMT siblings.
|
|
|
|
|
|
|
|
Make find_busiest_queue() not skip higher-priority SMT cores with more than
|
|
|
|
busy sibling.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Suggested-by: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 31 ++++++++++++++++++++++++++-----
|
|
|
|
1 file changed, 26 insertions(+), 5 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index f74777fc78d7d..24183e3eb3d47 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -10224,11 +10224,20 @@ static struct rq *find_busiest_queue(struct lb_env *env,
|
|
|
|
nr_running == 1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
- /* Make sure we only pull tasks from a CPU of lower priority */
|
|
|
|
+ /*
|
|
|
|
+ * Make sure we only pull tasks from a CPU of lower priority
|
|
|
|
+ * when balancing between SMT siblings.
|
|
|
|
+ *
|
|
|
|
+ * If balancing between cores, let lower priority CPUs help
|
|
|
|
+ * SMT cores with more than one busy sibling.
|
|
|
|
+ */
|
|
|
|
if ((env->sd->flags & SD_ASYM_PACKING) &&
|
|
|
|
sched_asym_prefer(i, env->dst_cpu) &&
|
|
|
|
- nr_running == 1)
|
|
|
|
- continue;
|
|
|
|
+ nr_running == 1) {
|
|
|
|
+ if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
|
|
|
|
+ (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
|
|
|
|
switch (env->migration_type) {
|
|
|
|
case migrate_load:
|
|
|
|
@@ -10318,8 +10327,20 @@ asym_active_balance(struct lb_env *env)
|
|
|
|
* lower priority CPUs in order to pack all tasks in the
|
|
|
|
* highest priority CPUs.
|
|
|
|
*/
|
|
|
|
- return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
|
|
|
|
- sched_asym_prefer(env->dst_cpu, env->src_cpu);
|
|
|
|
+ if (env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING)) {
|
|
|
|
+ /* Always obey priorities between SMT siblings. */
|
|
|
|
+ if (env->sd->flags & SD_SHARE_CPUCAPACITY)
|
|
|
|
+ return sched_asym_prefer(env->dst_cpu, env->src_cpu);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * A lower priority CPU can help an SMT core with more than one
|
|
|
|
+ * busy sibling.
|
|
|
|
+ */
|
|
|
|
+ return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
|
|
|
|
+ !is_core_idle(env->src_cpu);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From c1e77e8eea55b2d16b80c6dde3bcb3cf7e232aa5 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:33 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Keep a fully_busy SMT sched group as busiest
|
|
|
|
|
|
|
|
When comparing two fully_busy scheduling groups, keep the current busiest
|
|
|
|
group if it represents an SMT core. Tasks in such scheduling group share
|
|
|
|
CPU resources and need more help than tasks in a non-SMT fully_busy group.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 16 ++++++++++++++--
|
|
|
|
1 file changed, 14 insertions(+), 2 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 24183e3eb3d47..30b0e8476d1c6 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -9302,10 +9302,22 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
|
|
|
* contention when accessing shared HW resources.
|
|
|
|
*
|
|
|
|
* XXX for now avg_load is not computed and always 0 so we
|
|
|
|
- * select the 1st one.
|
|
|
|
+ * select the 1st one, except if @sg is composed of SMT
|
|
|
|
+ * siblings.
|
|
|
|
*/
|
|
|
|
- if (sgs->avg_load <= busiest->avg_load)
|
|
|
|
+
|
|
|
|
+ if (sgs->avg_load < busiest->avg_load)
|
|
|
|
return false;
|
|
|
|
+
|
|
|
|
+ if (sgs->avg_load == busiest->avg_load) {
|
|
|
|
+ /*
|
|
|
|
+ * SMT sched groups need more help than non-SMT groups.
|
|
|
|
+ * If @sg happens to also be SMT, either choice is good.
|
|
|
|
+ */
|
|
|
|
+ if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
break;
|
|
|
|
|
|
|
|
case group_has_spare:
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From a6d2b260d711873add514001a4eca293ff40c860 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:34 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Use the prefer_sibling flag of the current sched
|
|
|
|
domain
|
|
|
|
|
|
|
|
SD_PREFER_SIBLING is set from the SMT scheduling domain up to the first
|
|
|
|
non-NUMA domain (the exception is systems with SD_ASYM_CPUCAPACITY).
|
|
|
|
|
|
|
|
Above the SMT sched domain, all domains have a child. The SD_PREFER_
|
|
|
|
SIBLING is honored always regardless of the scheduling domain at which the
|
|
|
|
load balance takes place.
|
|
|
|
|
|
|
|
There are cases, however, in which the busiest CPU's sched domain has
|
|
|
|
child but the destination CPU's does not. Consider, for instance a non-SMT
|
|
|
|
core (or an SMT core with only one online sibling) doing load balance with
|
|
|
|
an SMT core at the MC level. SD_PREFER_SIBLING will not be honored. We are
|
|
|
|
left with a fully busy SMT core and an idle non-SMT core.
|
|
|
|
|
|
|
|
Avoid inconsistent behavior. Use the prefer_sibling behavior at the current
|
|
|
|
scheduling domain, not its child.
|
|
|
|
|
|
|
|
The NUMA sched domain does not have the SD_PREFER_SIBLING flag. Thus, we
|
|
|
|
will not spread load among NUMA sched groups, as desired.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Suggested-by: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 10 +++++-----
|
|
|
|
1 file changed, 5 insertions(+), 5 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 30b0e8476d1c6..9e98cfcf1e48b 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -9792,7 +9792,6 @@ static void update_idle_cpu_scan(struct lb_env *env,
|
|
|
|
|
|
|
|
static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)
|
|
|
|
{
|
|
|
|
- struct sched_domain *child = env->sd->child;
|
|
|
|
struct sched_group *sg = env->sd->groups;
|
|
|
|
struct sg_lb_stats *local = &sds->local_stat;
|
|
|
|
struct sg_lb_stats tmp_sgs;
|
|
|
|
@@ -9833,9 +9832,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
|
|
|
|
sg = sg->next;
|
|
|
|
} while (sg != env->sd->groups);
|
|
|
|
|
|
|
|
- /* Tag domain that child domain prefers tasks go to siblings first */
|
|
|
|
- sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
|
|
|
|
-
|
|
|
|
+ /*
|
|
|
|
+ * Tag domain that @env::sd prefers to spread excess tasks among
|
|
|
|
+ * sibling sched groups.
|
|
|
|
+ */
|
|
|
|
+ sds->prefer_sibling = env->sd->flags & SD_PREFER_SIBLING;
|
|
|
|
|
|
|
|
if (env->sd->flags & SD_NUMA)
|
|
|
|
env->fbq_type = fbq_classify_group(&sds->busiest_stat);
|
|
|
|
@@ -10134,7 +10135,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
|
|
|
|
goto out_balanced;
|
|
|
|
}
|
|
|
|
|
|
|
|
- /* Try to move all excess tasks to child's sibling domain */
|
|
|
|
if (sds.prefer_sibling && local->group_type == group_has_spare &&
|
|
|
|
busiest->sum_nr_running > local->sum_nr_running + 1)
|
|
|
|
goto force_balance;
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 08ff98c145516a1013cc5cc4f1d9b0d36388ec90 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:35 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Do not even the number of busy CPUs via
|
|
|
|
asym_packing
|
|
|
|
|
|
|
|
Now that find_busiest_group() triggers load balancing between a fully_
|
|
|
|
busy SMT2 core and an idle non-SMT core, it is no longer needed to force
|
|
|
|
balancing via asym_packing. Use asym_packing only as intended: when there
|
|
|
|
is high-priority CPU that is idle.
|
|
|
|
|
|
|
|
After this change, the same logic apply to SMT and non-SMT local groups.
|
|
|
|
Simplify asym_smt_can_pull_tasks() accordingly.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 37 +++++--------------------------------
|
|
|
|
1 file changed, 5 insertions(+), 32 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 9e98cfcf1e48b..635e8b41a87c9 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -9035,20 +9035,15 @@ group_type group_classify(unsigned int imbalance_pct,
|
|
|
|
* @sgs: Load-balancing statistics of the candidate busiest group
|
|
|
|
* @sg: The candidate busiest group
|
|
|
|
*
|
|
|
|
- * Check the state of the SMT siblings of both @sds::local and @sg and decide
|
|
|
|
- * if @dst_cpu can pull tasks.
|
|
|
|
+ * Check the state of the SMT siblings of @sg and decide if @dst_cpu can pull
|
|
|
|
+ * tasks.
|
|
|
|
*
|
|
|
|
* This function must be called only if all the SMT siblings of @dst_cpu are
|
|
|
|
* idle, if any.
|
|
|
|
*
|
|
|
|
- * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
|
|
|
|
- * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
|
|
|
|
- * only if @dst_cpu has higher priority.
|
|
|
|
- *
|
|
|
|
- * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
|
|
|
|
- * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
|
|
|
|
- * imbalances in the number of busy CPUs will be dealt with in
|
|
|
|
- * find_busiest_group().
|
|
|
|
+ * @dst_cpu can pull tasks if @sg has exactly one busy CPU (i.e., one more than
|
|
|
|
+ * @sds::local) and has lower group priority than @sds::local. Bigger imbalances
|
|
|
|
+ * in the number of busy CPUs will be dealt with in find_busiest_group().
|
|
|
|
*
|
|
|
|
* Return: true if @dst_cpu can pull tasks, false otherwise.
|
|
|
|
*/
|
|
|
|
@@ -9057,33 +9052,11 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
|
|
|
|
struct sched_group *sg)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SCHED_SMT
|
|
|
|
- bool local_is_smt;
|
|
|
|
int sg_busy_cpus;
|
|
|
|
|
|
|
|
- local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
|
|
|
|
sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
|
|
|
|
|
|
|
|
- if (!local_is_smt) {
|
|
|
|
- /*
|
|
|
|
- * If we are here, @dst_cpu is idle and does not have SMT
|
|
|
|
- * siblings. Pull tasks if candidate group has two or more
|
|
|
|
- * busy CPUs.
|
|
|
|
- */
|
|
|
|
- if (sg_busy_cpus >= 2) /* implies sg_is_smt */
|
|
|
|
- return true;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * @dst_cpu does not have SMT siblings. @sg may have SMT
|
|
|
|
- * siblings and only one is busy. In such case, @dst_cpu
|
|
|
|
- * can help if it has higher priority and is idle (i.e.,
|
|
|
|
- * it has no running tasks).
|
|
|
|
- */
|
|
|
|
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
/*
|
|
|
|
- * @dst_cpu has SMT siblings and are also idle.
|
|
|
|
- *
|
|
|
|
* If the difference in the number of busy CPUs is two or more, let
|
|
|
|
* find_busiest_group() take care of it. We only care if @sg has
|
|
|
|
* exactly one busy CPU. This covers SMT and non-SMT sched groups.
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From f70e8d703a3703327fea540de41d6b4df922a85d Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:36 -0800
|
|
|
|
Subject: [PATCH] sched/topology: Remove SHARED_CHILD from ASYM_PACKING
|
|
|
|
|
|
|
|
Only x86 and Power7 use ASYM_PACKING. They use it differently.
|
|
|
|
|
|
|
|
Power7 has cores of equal priority, but the SMT siblings of a core have
|
|
|
|
different priorities. Parent scheduling domains do not need (nor have) the
|
|
|
|
ASYM_PACKING flag. SHARED_CHILD is not needed. Using SHARED_PARENT would
|
|
|
|
cause the topology debug code to complain.
|
|
|
|
|
|
|
|
X86 has cores of different priority, but all the SMT siblings of the core
|
|
|
|
have equal priority. It needs ASYM_PACKING at the MC level, but not at the
|
|
|
|
SMT level (it also needs it at upper levels if they have scheduling groups
|
|
|
|
of different priority). Removing ASYM_PACKING from the SMT domain causes
|
|
|
|
the topology debug code to complain.
|
|
|
|
|
|
|
|
Remove SHARED_CHILD for now. We still need a topology check that satisfies
|
|
|
|
both architectures.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Suggested-by: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
include/linux/sched/sd_flags.h | 5 +----
|
|
|
|
1 file changed, 1 insertion(+), 4 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index 57bde66d95f7a..800238854ba54 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/include/linux/sched/sd_flags.h
|
|
|
|
+++ b/include/linux/sched/sd_flags.h
|
|
|
|
@@ -132,12 +132,9 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
|
|
|
|
/*
|
|
|
|
* Place busy tasks earlier in the domain
|
|
|
|
*
|
|
|
|
- * SHARED_CHILD: Usually set on the SMT level. Technically could be set further
|
|
|
|
- * up, but currently assumed to be set from the base domain
|
|
|
|
- * upwards (see update_top_cache_domain()).
|
|
|
|
* NEEDS_GROUPS: Load balancing flag.
|
|
|
|
*/
|
|
|
|
-SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
|
|
|
|
+SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prefer to place tasks in a sibling domain
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From fa1585eecfadd43f866f50951bbe106c09e1f79f Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:37 -0800
|
|
|
|
Subject: [PATCH] x86/sched: Remove SD_ASYM_PACKING from the SMT domain flags
|
|
|
|
|
|
|
|
There is no difference between any of the SMT siblings of a physical core.
|
|
|
|
Do not do asym_packing load balancing at this level.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/kernel/smpboot.c | 2 +-
|
|
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 3f3ea0287f694..c3de98224cb4f 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/kernel/smpboot.c
|
|
|
|
+++ b/arch/x86/kernel/smpboot.c
|
|
|
|
@@ -545,7 +545,7 @@ static int x86_core_flags(void)
|
|
|
|
#ifdef CONFIG_SCHED_SMT
|
|
|
|
static int x86_smt_flags(void)
|
|
|
|
{
|
|
|
|
- return cpu_smt_flags() | x86_sched_itmt_flags();
|
|
|
|
+ return cpu_smt_flags();
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_SCHED_CLUSTER
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 5f0150f15f343e543bc257bca1db30f3481ed474 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 20:58:38 -0800
|
|
|
|
Subject: [PATCH] x86/sched/itmt: Give all SMT siblings of a core the same
|
|
|
|
priority
|
|
|
|
|
|
|
|
X86 does not have the SD_ASYM_PACKING flag in the SMT domain. The scheduler
|
|
|
|
knows how to handle SMT and non-SMT cores of different priority. There is
|
|
|
|
no reason for SMT siblings of a core to have different priorities.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Reviewed-by: Len Brown <len.brown@intel.com>
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Tested-by: Zhang Rui <rui.zhang@intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/kernel/itmt.c | 23 +++++------------------
|
|
|
|
1 file changed, 5 insertions(+), 18 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 9ff480e94511b..6510883c5e817 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/kernel/itmt.c
|
|
|
|
+++ b/arch/x86/kernel/itmt.c
|
|
|
|
@@ -174,32 +174,19 @@ int arch_asym_cpu_priority(int cpu)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
|
|
|
|
- * @prio: Priority of cpu core
|
|
|
|
- * @core_cpu: The cpu number associated with the core
|
|
|
|
+ * @prio: Priority of @cpu
|
|
|
|
+ * @cpu: The CPU number
|
|
|
|
*
|
|
|
|
* The pstate driver will find out the max boost frequency
|
|
|
|
* and call this function to set a priority proportional
|
|
|
|
- * to the max boost frequency. CPU with higher boost
|
|
|
|
+ * to the max boost frequency. CPUs with higher boost
|
|
|
|
* frequency will receive higher priority.
|
|
|
|
*
|
|
|
|
* No need to rebuild sched domain after updating
|
|
|
|
* the CPU priorities. The sched domains have no
|
|
|
|
* dependency on CPU priorities.
|
|
|
|
*/
|
|
|
|
-void sched_set_itmt_core_prio(int prio, int core_cpu)
|
|
|
|
+void sched_set_itmt_core_prio(int prio, int cpu)
|
|
|
|
{
|
|
|
|
- int cpu, i = 1;
|
|
|
|
-
|
|
|
|
- for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
|
|
|
|
- int smt_prio;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Ensure that the siblings are moved to the end
|
|
|
|
- * of the priority chain and only used when
|
|
|
|
- * all other high priority cpus are out of capacity.
|
|
|
|
- */
|
|
|
|
- smt_prio = prio * smp_num_siblings / (i * i);
|
|
|
|
- per_cpu(sched_core_priority, cpu) = smt_prio;
|
|
|
|
- i++;
|
|
|
|
- }
|
|
|
|
+ per_cpu(sched_core_priority, cpu) = prio;
|
|
|
|
}
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 59c32bf713f2d1c52fedb38ceeef07375736dbbc Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:42 -0800
|
|
|
|
Subject: [PATCH] sched/task_struct: Introduce IPC classes of tasks
|
|
|
|
|
|
|
|
On hybrid processors, the architecture differences between the types of
|
|
|
|
CPUs lead to different instructions-per-cycle (IPC) on each type of CPU.
|
|
|
|
IPCs may differ further by the type of instructions. Instructions can be
|
|
|
|
grouped into classes of similar IPCs.
|
|
|
|
|
|
|
|
Hence, tasks can be classified into groups based on the type of
|
|
|
|
instructions they execute.
|
|
|
|
|
|
|
|
Add a new member task_struct::ipcc to associate a particular task to
|
|
|
|
an IPC class that depends on the instructions it executes.
|
|
|
|
|
|
|
|
The scheduler may use the IPC class of a task and data about the
|
|
|
|
performance among CPUs of a given IPC class to improve throughput. It
|
|
|
|
may, for instance, place certain classes of tasks on CPUs of higher
|
|
|
|
performance.
|
|
|
|
|
|
|
|
The methods to determine the classification of a task and its relative
|
|
|
|
IPC score are specific to each CPU architecture.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
include/linux/sched.h | 10 ++++++++++
|
|
|
|
init/Kconfig | 12 ++++++++++++
|
|
|
|
2 files changed, 22 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index ffb6eb55cd135..ca0c32bf796fb 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/include/linux/sched.h
|
|
|
|
+++ b/include/linux/sched.h
|
|
|
|
@@ -127,6 +127,8 @@ struct task_group;
|
|
|
|
__TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
|
|
|
|
TASK_PARKED)
|
|
|
|
|
|
|
|
+#define IPC_CLASS_UNCLASSIFIED 0
|
|
|
|
+
|
|
|
|
#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
|
|
|
|
|
|
|
|
#define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
|
|
|
|
@@ -1528,6 +1530,14 @@ struct task_struct {
|
|
|
|
union rv_task_monitor rv[RV_PER_TASK_MONITORS];
|
|
|
|
#endif
|
|
|
|
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+ /*
|
|
|
|
+ * A hardware-defined classification of task that reflects but is
|
|
|
|
+ * not identical to the number of instructions per cycle.
|
|
|
|
+ */
|
|
|
|
+ unsigned short ipcc;
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* New fields for task_struct should be added above here, so that
|
|
|
|
* they are included in the randomized portion of task_struct.
|
|
|
|
diff --git a/init/Kconfig b/init/Kconfig
|
2023-03-12 00:55:40 +00:00
|
|
|
index 0c214af99085d..0ddda55fde6a6 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/init/Kconfig
|
|
|
|
+++ b/init/Kconfig
|
|
|
|
@@ -865,6 +865,18 @@ config UCLAMP_BUCKETS_COUNT
|
|
|
|
|
|
|
|
If in doubt, use the default value.
|
|
|
|
|
|
|
|
+config IPC_CLASSES
|
|
|
|
+ bool "IPC classes of tasks"
|
|
|
|
+ depends on SMP
|
|
|
|
+ help
|
|
|
|
+ If selected, each task is assigned a classification value that
|
|
|
|
+ reflects the type of instructions that the task executes. This
|
|
|
|
+ classification reflects but is not equal to the number of
|
|
|
|
+ instructions retired per cycle.
|
|
|
|
+
|
|
|
|
+ The scheduler uses the classification value to improve the placement
|
|
|
|
+ of tasks.
|
|
|
|
+
|
|
|
|
endmenu
|
|
|
|
|
|
|
|
#
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From c4738d1d773b0e4066f30875c6393adb77d55837 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:43 -0800
|
|
|
|
Subject: [PATCH] sched: Add interfaces for IPC classes
|
|
|
|
|
|
|
|
Add the interfaces that architectures shall implement to convey the data
|
|
|
|
to support IPC classes.
|
|
|
|
|
|
|
|
arch_update_ipcc() updates the IPC classification of the current task as
|
|
|
|
given by hardware.
|
|
|
|
|
|
|
|
arch_get_ipcc_score() provides a performance score for a given IPC class
|
|
|
|
when placed on a specific CPU. Higher scores indicate higher performance.
|
|
|
|
|
|
|
|
When a driver or equivalent enablement code has configured the necessary
|
|
|
|
hardware to support IPC classes, it should call sched_enable_ipc_classes()
|
|
|
|
to notify the scheduler that it can start using IPC classes data.
|
|
|
|
|
|
|
|
The number of classes and the score of each class of task are determined
|
|
|
|
by hardware.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
include/linux/sched/topology.h | 6 ++++
|
|
|
|
kernel/sched/sched.h | 66 ++++++++++++++++++++++++++++++++++
|
|
|
|
kernel/sched/topology.c | 9 +++++
|
|
|
|
3 files changed, 81 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index 816df6cc444e1..5b084d3c9ad12 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/include/linux/sched/topology.h
|
|
|
|
+++ b/include/linux/sched/topology.h
|
|
|
|
@@ -280,4 +280,10 @@ static inline int task_node(const struct task_struct *p)
|
|
|
|
return cpu_to_node(task_cpu(p));
|
|
|
|
}
|
|
|
|
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+extern void sched_enable_ipc_classes(void);
|
|
|
|
+#else
|
|
|
|
+static inline void sched_enable_ipc_classes(void) { }
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
#endif /* _LINUX_SCHED_TOPOLOGY_H */
|
|
|
|
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index d6d488e8eb554..a3b2b66e077d9 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/sched.h
|
|
|
|
+++ b/kernel/sched/sched.h
|
|
|
|
@@ -2511,6 +2511,72 @@ void arch_scale_freq_tick(void)
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+DECLARE_STATIC_KEY_FALSE(sched_ipcc);
|
|
|
|
+
|
|
|
|
+static inline bool sched_ipcc_enabled(void)
|
|
|
|
+{
|
|
|
|
+ return static_branch_unlikely(&sched_ipcc);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#ifndef arch_update_ipcc
|
|
|
|
+/**
|
|
|
|
+ * arch_update_ipcc() - Update the IPC class of the current task
|
|
|
|
+ * @curr: The current task
|
|
|
|
+ *
|
|
|
|
+ * Request that the IPC classification of @curr is updated.
|
|
|
|
+ *
|
|
|
|
+ * Returns: none
|
|
|
|
+ */
|
|
|
|
+static __always_inline
|
|
|
|
+void arch_update_ipcc(struct task_struct *curr)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+#ifndef arch_get_ipcc_score
|
|
|
|
+
|
|
|
|
+#define SCHED_IPCC_SCORE_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
|
|
|
|
+/**
|
|
|
|
+ * arch_get_ipcc_score() - Get the IPC score of a class of task
|
|
|
|
+ * @ipcc: The IPC class
|
|
|
|
+ * @cpu: A CPU number
|
|
|
|
+ *
|
|
|
|
+ * The IPC performance scores reflects (but it is not identical to) the number
|
|
|
|
+ * of instructions retired per cycle for a given IPC class. It is a linear and
|
|
|
|
+ * abstract metric. Higher scores reflect better performance.
|
|
|
|
+ *
|
|
|
|
+ * The IPC score can be normalized with respect to the class, i, with the
|
|
|
|
+ * highest IPC score on the CPU, c, with highest performance:
|
|
|
|
+ *
|
|
|
|
+ * IPC(i, c)
|
|
|
|
+ * ------------------------------------ * SCHED_IPCC_SCORE_SCALE
|
|
|
|
+ * max(IPC(i, c) : (i, c))
|
|
|
|
+ *
|
|
|
|
+ * Scheduling schemes that want to use the IPC score along with other
|
|
|
|
+ * normalized metrics for scheduling (e.g., CPU capacity) may need to normalize
|
|
|
|
+ * it.
|
|
|
|
+ *
|
|
|
|
+ * Other scheduling schemes (e.g., asym_packing) do not need normalization.
|
|
|
|
+ *
|
|
|
|
+ * Returns the performance score of an IPC class, @ipcc, when running on @cpu.
|
|
|
|
+ * Error when either @ipcc or @cpu are invalid.
|
|
|
|
+ */
|
|
|
|
+static __always_inline
|
|
|
|
+unsigned long arch_get_ipcc_score(unsigned short ipcc, int cpu)
|
|
|
|
+{
|
|
|
|
+ return SCHED_IPCC_SCORE_SCALE;
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+#else /* CONFIG_IPC_CLASSES */
|
|
|
|
+
|
|
|
|
+#define arch_get_ipcc_score(ipcc, cpu) (-EINVAL)
|
|
|
|
+#define arch_update_ipcc(curr)
|
|
|
|
+
|
|
|
|
+static inline bool sched_ipcc_enabled(void) { return false; }
|
|
|
|
+
|
|
|
|
+#endif /* CONFIG_IPC_CLASSES */
|
|
|
|
+
|
|
|
|
#ifndef arch_scale_freq_capacity
|
|
|
|
/**
|
|
|
|
* arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
|
|
|
|
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 8739c2a5a54ea..60e03d15f58ca 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/topology.c
|
|
|
|
+++ b/kernel/sched/topology.c
|
|
|
|
@@ -670,6 +670,15 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
|
|
|
|
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
|
|
|
|
DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
|
|
|
|
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+DEFINE_STATIC_KEY_FALSE(sched_ipcc);
|
|
|
|
+
|
|
|
|
+void sched_enable_ipc_classes(void)
|
|
|
|
+{
|
|
|
|
+ static_branch_enable_cpuslocked(&sched_ipcc);
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
static void update_top_cache_domain(int cpu)
|
|
|
|
{
|
|
|
|
struct sched_domain_shared *sds = NULL;
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 021e8196d4ef87aa80fae8024b8055a6bca5b9c4 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:44 -0800
|
|
|
|
Subject: [PATCH] sched/core: Initialize the IPC class of a new task
|
|
|
|
|
|
|
|
New tasks shall start life as unclassified. They will be classified by
|
|
|
|
hardware when they run.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/core.c | 3 +++
|
|
|
|
1 file changed, 3 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index f730b6fe94a7f..7b63bf90430bb 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/core.c
|
|
|
|
+++ b/kernel/sched/core.c
|
|
|
|
@@ -4377,6 +4377,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|
|
|
p->se.prev_sum_exec_runtime = 0;
|
|
|
|
p->se.nr_migrations = 0;
|
|
|
|
p->se.vruntime = 0;
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+ p->ipcc = IPC_CLASS_UNCLASSIFIED;
|
|
|
|
+#endif
|
|
|
|
INIT_LIST_HEAD(&p->se.group_node);
|
|
|
|
|
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From ef293bd0404885577a038daf0c011a57eeece3de Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:45 -0800
|
|
|
|
Subject: [PATCH] sched/core: Add user_tick as argument to scheduler_tick()
|
|
|
|
|
|
|
|
Differentiate between user and kernel ticks so that the scheduler updates
|
|
|
|
the IPC class of the current task during the former.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
include/linux/sched.h | 2 +-
|
|
|
|
kernel/sched/core.c | 2 +-
|
|
|
|
kernel/time/timer.c | 2 +-
|
|
|
|
3 files changed, 3 insertions(+), 3 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index ca0c32bf796fb..e58dc7503864c 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/include/linux/sched.h
|
|
|
|
+++ b/include/linux/sched.h
|
|
|
|
@@ -293,7 +293,7 @@ enum {
|
|
|
|
TASK_COMM_LEN = 16,
|
|
|
|
};
|
|
|
|
|
|
|
|
-extern void scheduler_tick(void);
|
|
|
|
+extern void scheduler_tick(bool user_tick);
|
|
|
|
|
|
|
|
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 7b63bf90430bb..0a8558421eba2 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/core.c
|
|
|
|
+++ b/kernel/sched/core.c
|
|
|
|
@@ -5492,7 +5492,7 @@ static inline u64 cpu_resched_latency(struct rq *rq) { return 0; }
|
|
|
|
* This function gets called by the timer code, with HZ frequency.
|
|
|
|
* We call it with interrupts disabled.
|
|
|
|
*/
|
|
|
|
-void scheduler_tick(void)
|
|
|
|
+void scheduler_tick(bool user_tick)
|
|
|
|
{
|
|
|
|
int cpu = smp_processor_id();
|
|
|
|
struct rq *rq = cpu_rq(cpu);
|
|
|
|
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 717fcb9fb14aa..b444b6f5f585b 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/time/timer.c
|
|
|
|
+++ b/kernel/time/timer.c
|
|
|
|
@@ -1841,7 +1841,7 @@ void update_process_times(int user_tick)
|
|
|
|
if (in_irq())
|
|
|
|
irq_work_tick();
|
|
|
|
#endif
|
|
|
|
- scheduler_tick();
|
|
|
|
+ scheduler_tick(user_tick);
|
|
|
|
if (IS_ENABLED(CONFIG_POSIX_TIMERS))
|
|
|
|
run_posix_cpu_timers();
|
|
|
|
}
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 45a0511c0378f765cc26fb1faaddc3080ab2e179 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:46 -0800
|
|
|
|
Subject: [PATCH] sched/core: Update the IPC class of the current task
|
|
|
|
|
|
|
|
When supported, hardware monitors the instruction stream to classify the
|
|
|
|
current task. Hence, at userspace tick, we are ready to read the most
|
|
|
|
recent classification result for the current task.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/core.c | 3 +++
|
|
|
|
1 file changed, 3 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 0a8558421eba2..4782b1359eb89 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/core.c
|
|
|
|
+++ b/kernel/sched/core.c
|
|
|
|
@@ -5504,6 +5504,9 @@ void scheduler_tick(bool user_tick)
|
|
|
|
if (housekeeping_cpu(cpu, HK_TYPE_TICK))
|
|
|
|
arch_scale_freq_tick();
|
|
|
|
|
|
|
|
+ if (sched_ipcc_enabled() && user_tick)
|
|
|
|
+ arch_update_ipcc(curr);
|
|
|
|
+
|
|
|
|
sched_clock_tick();
|
|
|
|
|
|
|
|
rq_lock(rq, &rf);
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From b88b78dd46cf545386f984768597179451f2bdf0 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:47 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Collect load-balancing stats for IPC classes
|
|
|
|
|
|
|
|
When selecting a busiest scheduling group, the IPC class of the current
|
|
|
|
task can be used to select between two scheduling groups of types asym_
|
|
|
|
packing or fully_busy that are otherwise identical.
|
|
|
|
|
|
|
|
Compute the IPC class performance score for a scheduling group. It
|
|
|
|
is the sum of the scores of the current tasks of all the runqueues.
|
|
|
|
|
|
|
|
Also, keep track of the class of the task with the lowest IPC class score
|
|
|
|
in the scheduling group.
|
|
|
|
|
|
|
|
These two metrics will be used during idle load balancing to compute the
|
|
|
|
current and the prospective IPC class score of a scheduling group.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
1 file changed, 61 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 635e8b41a87c9..86f779d9d2a90 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -8751,6 +8751,11 @@ struct sg_lb_stats {
|
|
|
|
unsigned int nr_numa_running;
|
|
|
|
unsigned int nr_preferred_running;
|
|
|
|
#endif
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+ unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
|
|
|
|
+ unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
|
|
|
|
+ unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
|
|
|
|
+#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -9028,6 +9033,59 @@ group_type group_classify(unsigned int imbalance_pct,
|
|
|
|
return group_has_spare;
|
|
|
|
}
|
|
|
|
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
|
|
|
|
+{
|
|
|
|
+ /* All IPCC stats have been set to zero in update_sg_lb_stats(). */
|
|
|
|
+ sgs->min_score = ULONG_MAX;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Called only if cpu_of(@rq) is not idle and has tasks running. */
|
|
|
|
+static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
|
|
|
|
+ struct rq *rq)
|
|
|
|
+{
|
|
|
|
+ struct task_struct *curr;
|
|
|
|
+ unsigned short ipcc;
|
|
|
|
+ unsigned long score;
|
|
|
|
+
|
|
|
|
+ if (!sched_ipcc_enabled())
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ curr = rcu_dereference(rq->curr);
|
|
|
|
+ if (!curr || (curr->flags & PF_EXITING) || is_idle_task(curr) ||
|
|
|
|
+ task_is_realtime(curr) ||
|
|
|
|
+ !cpumask_test_cpu(dst_cpu, curr->cpus_ptr))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ ipcc = curr->ipcc;
|
|
|
|
+ score = arch_get_ipcc_score(ipcc, cpu_of(rq));
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Ignore tasks with invalid scores. When finding the busiest group, we
|
|
|
|
+ * prefer those with higher sum_score. This group will not be selected.
|
|
|
|
+ */
|
|
|
|
+ if (IS_ERR_VALUE(score))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ sgs->sum_score += score;
|
|
|
|
+
|
|
|
|
+ if (score < sgs->min_score) {
|
|
|
|
+ sgs->min_score = score;
|
|
|
|
+ sgs->min_ipcc = ipcc;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#else /* CONFIG_IPC_CLASSES */
|
|
|
|
+static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
|
|
|
|
+ struct rq *rq)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+#endif /* CONFIG_IPC_CLASSES */
|
|
|
|
+
|
|
|
|
/**
|
|
|
|
* asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks
|
|
|
|
* @dst_cpu: Destination CPU of the load balancing
|
|
|
|
@@ -9120,6 +9178,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
|
|
|
int i, nr_running, local_group;
|
|
|
|
|
|
|
|
memset(sgs, 0, sizeof(*sgs));
|
|
|
|
+ init_rq_ipcc_stats(sgs);
|
|
|
|
|
|
|
|
local_group = group == sds->local;
|
|
|
|
|
|
|
|
@@ -9169,6 +9228,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
|
|
|
if (sgs->group_misfit_task_load < load)
|
|
|
|
sgs->group_misfit_task_load = load;
|
|
|
|
}
|
|
|
|
+
|
|
|
|
+ update_sg_lb_ipcc_stats(env->dst_cpu, sgs, rq);
|
|
|
|
}
|
|
|
|
|
|
|
|
sgs->group_capacity = group->sgc->capacity;
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From ba3c46567c032ee843d8ab5f576ddc78df1a56bf Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:48 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Compute IPC class scores for load balancing
|
|
|
|
|
|
|
|
Compute the joint total (both current and prospective) IPC class score of
|
|
|
|
a scheduling group and the local scheduling group.
|
|
|
|
|
|
|
|
These IPCC statistics are used during idle load balancing. The candidate
|
|
|
|
scheduling group will have one fewer busy CPU after load balancing. This
|
|
|
|
observation is important for cores with SMT support.
|
|
|
|
|
|
|
|
The IPCC score of scheduling groups composed of SMT siblings needs to
|
|
|
|
consider that the siblings share CPU resources. When computing the total
|
|
|
|
IPCC score of the scheduling group, divide score of each sibling by the
|
|
|
|
number of busy siblings.
|
|
|
|
|
|
|
|
Collect IPCC statistics for asym_packing and fully_busy scheduling groups.
|
|
|
|
When picking a busiest group, they are used to break ties between otherwise
|
|
|
|
identical groups.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
1 file changed, 68 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 86f779d9d2a90..3b84fb72891bc 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -8755,6 +8755,8 @@ struct sg_lb_stats {
|
|
|
|
unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
|
|
|
|
unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
|
|
|
|
unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
|
|
|
|
+ long ipcc_score_after; /* Prospective IPCC score after load balancing */
|
|
|
|
+ unsigned long ipcc_score_before; /* IPCC score before load balancing */
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
@@ -9075,6 +9077,62 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
+static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
|
|
|
|
+ struct sched_group *sg,
|
|
|
|
+ struct lb_env *env)
|
|
|
|
+{
|
|
|
|
+ unsigned long score_on_dst_cpu, before;
|
|
|
|
+ int busy_cpus;
|
|
|
|
+ long after;
|
|
|
|
+
|
|
|
|
+ if (!sched_ipcc_enabled())
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * IPCC scores are only useful during idle load balancing. For now,
|
|
|
|
+ * only asym_packing uses IPCC scores.
|
|
|
|
+ */
|
|
|
|
+ if (!(env->sd->flags & SD_ASYM_PACKING) ||
|
|
|
|
+ env->idle == CPU_NOT_IDLE)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * IPCC scores are used to break ties only between these types of
|
|
|
|
+ * groups.
|
|
|
|
+ */
|
|
|
|
+ if (sgs->group_type != group_fully_busy &&
|
|
|
|
+ sgs->group_type != group_asym_packing)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ busy_cpus = sgs->group_weight - sgs->idle_cpus;
|
|
|
|
+
|
|
|
|
+ /* No busy CPUs in the group. No tasks to move. */
|
|
|
|
+ if (!busy_cpus)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ score_on_dst_cpu = arch_get_ipcc_score(sgs->min_ipcc, env->dst_cpu);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Do not use IPC scores. sgs::ipcc_score_{after, before} will be zero
|
|
|
|
+ * and not used.
|
|
|
|
+ */
|
|
|
|
+ if (IS_ERR_VALUE(score_on_dst_cpu))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ before = sgs->sum_score;
|
|
|
|
+ after = before - sgs->min_score;
|
|
|
|
+
|
|
|
|
+ /* SMT siblings share throughput. */
|
|
|
|
+ if (busy_cpus > 1 && sg->flags & SD_SHARE_CPUCAPACITY) {
|
|
|
|
+ before /= busy_cpus;
|
|
|
|
+ /* One sibling will become idle after load balance. */
|
|
|
|
+ after /= busy_cpus - 1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ sgs->ipcc_score_after = after + score_on_dst_cpu;
|
|
|
|
+ sgs->ipcc_score_before = before;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
#else /* CONFIG_IPC_CLASSES */
|
|
|
|
static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
|
|
|
|
struct rq *rq)
|
|
|
|
@@ -9084,6 +9142,13 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
|
|
|
|
static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
+
|
|
|
|
+static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
|
|
|
|
+ struct sched_group *sg,
|
|
|
|
+ struct lb_env *env)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
#endif /* CONFIG_IPC_CLASSES */
|
|
|
|
|
|
|
|
/**
|
|
|
|
@@ -9245,6 +9310,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
|
|
|
|
|
|
|
sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
|
|
|
|
|
|
|
|
+ if (!local_group)
|
|
|
|
+ update_sg_lb_stats_scores(sgs, group, env);
|
|
|
|
+
|
|
|
|
/* Computing avg_load makes sense only when group is overloaded */
|
|
|
|
if (sgs->group_type == group_overloaded)
|
|
|
|
sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 98664eb2ae97aaf9b290077ef9e6629a7c1d7d79 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:49 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Use IPCC stats to break ties between asym_packing
|
|
|
|
sched groups
|
|
|
|
|
|
|
|
As it iterates, update_sd_pick_busiest() keeps on selecting as busiest
|
|
|
|
sched groups of identical priority. Since both groups have the same
|
|
|
|
priority, either group is a good choice. The IPCC statistics provide a
|
|
|
|
measure of the throughput before and after load balance. Use them to
|
|
|
|
pick a busiest scheduling group from otherwise identical asym_packing
|
|
|
|
scheduling groups.
|
|
|
|
|
|
|
|
Pick as busiest the scheduling group that yields a higher IPCC score
|
|
|
|
after load balancing.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 72 +++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
1 file changed, 72 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 3b84fb72891bc..89a13ae0185e6 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -9133,6 +9133,60 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
|
|
|
|
sgs->ipcc_score_before = before;
|
|
|
|
}
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * sched_asym_ipcc_prefer - Select a sched group based on its IPCC score
|
|
|
|
+ * @a: Load balancing statistics of a sched group
|
|
|
|
+ * @b: Load balancing statistics of a second sched group
|
|
|
|
+ *
|
|
|
|
+ * Returns: true if @a has a higher IPCC score than @b after load balance.
|
|
|
|
+ * False otherwise.
|
|
|
|
+ */
|
|
|
|
+static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
|
|
|
|
+ struct sg_lb_stats *b)
|
|
|
|
+{
|
|
|
|
+ if (!sched_ipcc_enabled())
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ /* @a increases overall throughput after load balance. */
|
|
|
|
+ if (a->ipcc_score_after > b->ipcc_score_after)
|
|
|
|
+ return true;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If @a and @b yield the same overall throughput, pick @a if
|
|
|
|
+ * its current throughput is lower than that of @b.
|
|
|
|
+ */
|
|
|
|
+ if (a->ipcc_score_after == b->ipcc_score_after)
|
|
|
|
+ return a->ipcc_score_before < b->ipcc_score_before;
|
|
|
|
+
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * sched_asym_ipcc_pick - Select a sched group based on its IPCC score
|
|
|
|
+ * @a: A scheduling group
|
|
|
|
+ * @b: A second scheduling group
|
|
|
|
+ * @a_stats: Load balancing statistics of @a
|
|
|
|
+ * @b_stats: Load balancing statistics of @b
|
|
|
|
+ *
|
|
|
|
+ * Returns: true if @a has the same priority and @a has tasks with IPC classes
|
|
|
|
+ * that yield higher overall throughput after load balance. False otherwise.
|
|
|
|
+ */
|
|
|
|
+static bool sched_asym_ipcc_pick(struct sched_group *a,
|
|
|
|
+ struct sched_group *b,
|
|
|
|
+ struct sg_lb_stats *a_stats,
|
|
|
|
+ struct sg_lb_stats *b_stats)
|
|
|
|
+{
|
|
|
|
+ /*
|
|
|
|
+ * Only use the class-specific preference selection if both sched
|
|
|
|
+ * groups have the same priority.
|
|
|
|
+ */
|
|
|
|
+ if (arch_asym_cpu_priority(a->asym_prefer_cpu) !=
|
|
|
|
+ arch_asym_cpu_priority(b->asym_prefer_cpu))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ return sched_asym_ipcc_prefer(a_stats, b_stats);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
#else /* CONFIG_IPC_CLASSES */
|
|
|
|
static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
|
|
|
|
struct rq *rq)
|
|
|
|
@@ -9149,6 +9203,14 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
+static bool sched_asym_ipcc_pick(struct sched_group *a,
|
|
|
|
+ struct sched_group *b,
|
|
|
|
+ struct sg_lb_stats *a_stats,
|
|
|
|
+ struct sg_lb_stats *b_stats)
|
|
|
|
+{
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
#endif /* CONFIG_IPC_CLASSES */
|
|
|
|
|
|
|
|
/**
|
|
|
|
@@ -9384,6 +9446,16 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
|
|
|
/* Prefer to move from lowest priority CPU's work */
|
|
|
|
if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
|
|
|
|
return false;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Unlike other callers of sched_asym_prefer(), here both @sg
|
|
|
|
+ * and @sds::busiest have tasks running. When they have equal
|
|
|
|
+ * priority, their IPC class scores can be used to select a
|
|
|
|
+ * better busiest.
|
|
|
|
+ */
|
|
|
|
+ if (sched_asym_ipcc_pick(sds->busiest, sg, &sds->busiest_stat, sgs))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
break;
|
|
|
|
|
|
|
|
case group_misfit_task:
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From a4b0646c23237cc67703ed40bdde243d78bb7910 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:50 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Use IPCC stats to break ties between fully_busy
|
|
|
|
SMT groups
|
|
|
|
|
|
|
|
IPCC statistics are used during idle load balancing. After balancing one
|
|
|
|
of the siblings of an SMT core will become idle. The rest of the busy
|
|
|
|
siblings will enjoy increased throughput. The IPCC statistics provide
|
|
|
|
a measure of the increased throughput. Use them to pick a busiest group
|
|
|
|
from otherwise identical fully_busy scheduling groups (of which the
|
|
|
|
avg_load is equal - and zero).
|
|
|
|
|
|
|
|
Using IPCC scores to break ties with non-SMT fully_busy sched groups
|
|
|
|
is not necessary. SMT sched groups always need more help.
|
|
|
|
|
|
|
|
Add a stub sched_asym_ipcc_prefer() for !CONFIG_IPC_CLASSES.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 23 ++++++++++++++++++++---
|
|
|
|
1 file changed, 20 insertions(+), 3 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 89a13ae0185e6..4f75e9964e8ca 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -9203,6 +9203,12 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
+static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
|
|
|
|
+ struct sg_lb_stats *b)
|
|
|
|
+{
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static bool sched_asym_ipcc_pick(struct sched_group *a,
|
|
|
|
struct sched_group *b,
|
|
|
|
struct sg_lb_stats *a_stats,
|
|
|
|
@@ -9486,10 +9492,21 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
|
|
|
if (sgs->avg_load == busiest->avg_load) {
|
|
|
|
/*
|
|
|
|
* SMT sched groups need more help than non-SMT groups.
|
|
|
|
- * If @sg happens to also be SMT, either choice is good.
|
|
|
|
*/
|
|
|
|
- if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
|
|
|
|
- return false;
|
|
|
|
+ if (sds->busiest->flags & SD_SHARE_CPUCAPACITY) {
|
|
|
|
+ if (!(sg->flags & SD_SHARE_CPUCAPACITY))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Between two SMT groups, use IPCC scores to pick the
|
|
|
|
+ * one that would improve throughput the most (only
|
|
|
|
+ * asym_packing uses IPCC scores for now).
|
|
|
|
+ */
|
|
|
|
+ if (sched_ipcc_enabled() &&
|
|
|
|
+ env->sd->flags & SD_ASYM_PACKING &&
|
|
|
|
+ sched_asym_ipcc_prefer(busiest, sgs))
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From cc96548bd32f891b0cdf162cdde5f8ca2ba98404 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:51 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Use IPCC scores to select a busiest runqueue
|
|
|
|
|
|
|
|
For two runqueues of equal priority and equal number of running of tasks,
|
|
|
|
select the one whose current task would have the highest IPC class score
|
|
|
|
if placed on the destination CPU.
|
|
|
|
|
|
|
|
For now, use IPCC scores only for scheduling domains with the
|
|
|
|
SD_ASYM_PACKING flag.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/fair.c | 64 +++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
1 file changed, 64 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 4f75e9964e8ca..fc42b58f1ba42 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -9187,6 +9187,37 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
|
|
|
|
return sched_asym_ipcc_prefer(a_stats, b_stats);
|
|
|
|
}
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
|
|
|
|
+ * @p: A task
|
|
|
|
+ * @env: Load balancing environment
|
|
|
|
+ *
|
|
|
|
+ * Returns: The IPCC score delta that @p would get if placed in the destination
|
|
|
|
+ * CPU of @env. LONG_MIN to indicate that the delta should not be used.
|
|
|
|
+ */
|
|
|
|
+static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
|
|
|
|
+{
|
|
|
|
+ unsigned long score_src, score_dst;
|
|
|
|
+ unsigned short ipcc = p->ipcc;
|
|
|
|
+
|
|
|
|
+ if (!sched_ipcc_enabled())
|
|
|
|
+ return LONG_MIN;
|
|
|
|
+
|
|
|
|
+ /* Only asym_packing uses IPCC scores at the moment. */
|
|
|
|
+ if (!(env->sd->flags & SD_ASYM_PACKING))
|
|
|
|
+ return LONG_MIN;
|
|
|
|
+
|
|
|
|
+ score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
|
|
|
|
+ if (IS_ERR_VALUE(score_dst))
|
|
|
|
+ return LONG_MIN;
|
|
|
|
+
|
|
|
|
+ score_src = arch_get_ipcc_score(ipcc, task_cpu(p));
|
|
|
|
+ if (IS_ERR_VALUE(score_src))
|
|
|
|
+ return LONG_MIN;
|
|
|
|
+
|
|
|
|
+ return score_dst - score_src;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
#else /* CONFIG_IPC_CLASSES */
|
|
|
|
static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
|
|
|
|
struct rq *rq)
|
|
|
|
@@ -9217,6 +9248,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
+static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
|
|
|
|
+{
|
|
|
|
+ return LONG_MIN;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
#endif /* CONFIG_IPC_CLASSES */
|
|
|
|
|
|
|
|
/**
|
|
|
|
@@ -10377,6 +10413,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
|
|
|
|
{
|
|
|
|
struct rq *busiest = NULL, *rq;
|
|
|
|
unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
|
|
|
|
+ long busiest_ipcc_delta = LONG_MIN;
|
|
|
|
unsigned int busiest_nr = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
@@ -10493,8 +10530,35 @@ static struct rq *find_busiest_queue(struct lb_env *env,
|
|
|
|
|
|
|
|
case migrate_task:
|
|
|
|
if (busiest_nr < nr_running) {
|
|
|
|
+ struct task_struct *curr;
|
|
|
|
+
|
|
|
|
busiest_nr = nr_running;
|
|
|
|
busiest = rq;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Remember the IPCC score delta of busiest::curr.
|
|
|
|
+ * We may need it to break a tie with other queues
|
|
|
|
+ * with equal nr_running.
|
|
|
|
+ */
|
|
|
|
+ curr = rcu_dereference(busiest->curr);
|
|
|
|
+ busiest_ipcc_delta = ipcc_score_delta(curr, env);
|
|
|
|
+ /*
|
|
|
|
+ * If rq and busiest have the same number of running
|
|
|
|
+ * tasks and IPC classes are supported, pick rq if doing
|
|
|
|
+ * so would give rq::curr a bigger IPC boost on dst_cpu.
|
|
|
|
+ */
|
|
|
|
+ } else if (busiest_nr == nr_running) {
|
|
|
|
+ struct task_struct *curr;
|
|
|
|
+ long delta;
|
|
|
|
+
|
|
|
|
+ curr = rcu_dereference(rq->curr);
|
|
|
|
+ delta = ipcc_score_delta(curr, env);
|
|
|
|
+
|
|
|
|
+ if (busiest_ipcc_delta < delta) {
|
|
|
|
+ busiest_ipcc_delta = delta;
|
|
|
|
+ busiest_nr = nr_running;
|
|
|
|
+ busiest = rq;
|
|
|
|
+ }
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 8435406d47c8fb7c349f7615ebb1cd0a812afc90 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:52 -0800
|
|
|
|
Subject: [PATCH] thermal: intel: hfi: Introduce Intel Thread Director classes
|
|
|
|
|
|
|
|
On Intel hybrid parts, each type of CPU has specific performance and
|
|
|
|
energy efficiency capabilities. The Intel Thread Director technology
|
|
|
|
extends the Hardware Feedback Interface (HFI) to provide performance and
|
|
|
|
energy efficiency data for advanced classes of instructions.
|
|
|
|
|
|
|
|
Add support to parse per-class capabilities.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
drivers/thermal/intel/intel_hfi.c | 30 ++++++++++++++++++++++++------
|
|
|
|
1 file changed, 24 insertions(+), 6 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 239afe02e5182..ae7eec197f680 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/drivers/thermal/intel/intel_hfi.c
|
|
|
|
+++ b/drivers/thermal/intel/intel_hfi.c
|
|
|
|
@@ -79,7 +79,7 @@ union cpuid6_edx {
|
|
|
|
* @ee_cap: Energy efficiency capability
|
|
|
|
*
|
|
|
|
* Capabilities of a logical processor in the HFI table. These capabilities are
|
|
|
|
- * unitless.
|
|
|
|
+ * unitless and specific to each HFI class.
|
|
|
|
*/
|
|
|
|
struct hfi_cpu_data {
|
|
|
|
u8 perf_cap;
|
|
|
|
@@ -91,7 +91,8 @@ struct hfi_cpu_data {
|
|
|
|
* @perf_updated: Hardware updated performance capabilities
|
|
|
|
* @ee_updated: Hardware updated energy efficiency capabilities
|
|
|
|
*
|
|
|
|
- * Properties of the data in an HFI table.
|
|
|
|
+ * Properties of the data in an HFI table. There exists one header per each
|
|
|
|
+ * HFI class.
|
|
|
|
*/
|
|
|
|
struct hfi_hdr {
|
|
|
|
u8 perf_updated;
|
|
|
|
@@ -129,16 +130,21 @@ struct hfi_instance {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* struct hfi_features - Supported HFI features
|
|
|
|
+ * @nr_classes: Number of classes supported
|
|
|
|
* @nr_table_pages: Size of the HFI table in 4KB pages
|
|
|
|
* @cpu_stride: Stride size to locate the capability data of a logical
|
|
|
|
* processor within the table (i.e., row stride)
|
|
|
|
+ * @class_stride: Stride size to locate a class within the capability
|
|
|
|
+ * data of a logical processor or the HFI table header
|
|
|
|
* @hdr_size: Size of the table header
|
|
|
|
*
|
|
|
|
* Parameters and supported features that are common to all HFI instances
|
|
|
|
*/
|
|
|
|
struct hfi_features {
|
|
|
|
+ unsigned int nr_classes;
|
|
|
|
size_t nr_table_pages;
|
|
|
|
unsigned int cpu_stride;
|
|
|
|
+ unsigned int class_stride;
|
|
|
|
unsigned int hdr_size;
|
|
|
|
};
|
|
|
|
|
|
|
|
@@ -325,8 +331,8 @@ static void init_hfi_cpu_index(struct hfi_cpu_info *info)
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
- * The format of the HFI table depends on the number of capabilities that the
|
|
|
|
- * hardware supports. Keep a data structure to navigate the table.
|
|
|
|
+ * The format of the HFI table depends on the number of capabilities and classes
|
|
|
|
+ * that the hardware supports. Keep a data structure to navigate the table.
|
|
|
|
*/
|
|
|
|
static void init_hfi_instance(struct hfi_instance *hfi_instance)
|
|
|
|
{
|
|
|
|
@@ -507,18 +513,30 @@ static __init int hfi_parse_features(void)
|
|
|
|
/* The number of 4KB pages required by the table */
|
|
|
|
hfi_features.nr_table_pages = edx.split.table_pages + 1;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Capability fields of an HFI class are grouped together. Classes are
|
|
|
|
+ * contiguous in memory. Hence, use the number of supported features to
|
|
|
|
+ * locate a specific class.
|
|
|
|
+ */
|
|
|
|
+ hfi_features.class_stride = nr_capabilities;
|
|
|
|
+
|
|
|
|
+ /* For now, use only one class of the HFI table */
|
|
|
|
+ hfi_features.nr_classes = 1;
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* The header contains change indications for each supported feature.
|
|
|
|
* The size of the table header is rounded up to be a multiple of 8
|
|
|
|
* bytes.
|
|
|
|
*/
|
|
|
|
- hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
|
|
|
|
+ hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities *
|
|
|
|
+ hfi_features.nr_classes, 8) * 8;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Data of each logical processor is also rounded up to be a multiple
|
|
|
|
* of 8 bytes.
|
|
|
|
*/
|
|
|
|
- hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
|
|
|
|
+ hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities *
|
|
|
|
+ hfi_features.nr_classes, 8) * 8;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 78b14aa7d839c69d3073e92c54445a085b22df4f Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:53 -0800
|
|
|
|
Subject: [PATCH] x86/cpufeatures: Add the Intel Thread Director feature
|
|
|
|
definitions
|
|
|
|
|
|
|
|
Intel Thread Director (ITD) provides hardware resources to classify
|
|
|
|
the current task. The classification reflects the type of instructions that
|
|
|
|
a task currently executes.
|
|
|
|
|
|
|
|
ITD extends the Hardware Feedback Interface table to provide performance
|
|
|
|
and energy efficiency capabilities for each of the supported classes of
|
|
|
|
tasks.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/include/asm/cpufeatures.h | 1 +
|
|
|
|
arch/x86/include/asm/disabled-features.h | 8 +++++++-
|
|
|
|
arch/x86/kernel/cpu/cpuid-deps.c | 1 +
|
|
|
|
3 files changed, 9 insertions(+), 1 deletion(-)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index 92729c38853d1..02f10cd5c7536 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/include/asm/cpufeatures.h
|
|
|
|
+++ b/arch/x86/include/asm/cpufeatures.h
|
|
|
|
@@ -342,6 +342,7 @@
|
|
|
|
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
|
|
|
|
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
|
|
|
|
#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
|
|
|
|
+#define X86_FEATURE_ITD (14*32+23) /* Intel Thread Director */
|
|
|
|
|
|
|
|
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
|
|
|
|
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
|
|
|
|
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index 33d2cd04d2544..7a668f6d0502d 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/include/asm/disabled-features.h
|
|
|
|
+++ b/arch/x86/include/asm/disabled-features.h
|
|
|
|
@@ -87,6 +87,12 @@
|
|
|
|
# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+# define DISABLE_ITD 0
|
|
|
|
+#else
|
|
|
|
+# define DISABLE_ITD (1 << (X86_FEATURE_ITD & 31))
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* Make sure to add features to the correct mask
|
|
|
|
*/
|
|
|
|
@@ -104,7 +110,7 @@
|
|
|
|
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
|
|
|
|
#define DISABLED_MASK12 0
|
|
|
|
#define DISABLED_MASK13 0
|
|
|
|
-#define DISABLED_MASK14 0
|
|
|
|
+#define DISABLED_MASK14 (DISABLE_ITD)
|
|
|
|
#define DISABLED_MASK15 0
|
|
|
|
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
|
|
|
|
DISABLE_ENQCMD)
|
|
|
|
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index c881bcafba7d7..f6f8a3cd4f2ce 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/kernel/cpu/cpuid-deps.c
|
|
|
|
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
|
|
|
|
@@ -78,6 +78,7 @@ static const struct cpuid_dep cpuid_deps[] = {
|
|
|
|
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
|
|
|
|
{ X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
|
|
|
|
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
|
|
|
|
+ { X86_FEATURE_ITD, X86_FEATURE_HFI },
|
|
|
|
{}
|
|
|
|
};
|
|
|
|
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From b4ce6b73c21395712049b4eeb71e55bf617be6f2 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:54 -0800
|
|
|
|
Subject: [PATCH] thermal: intel: hfi: Store per-CPU IPCC scores
|
|
|
|
|
|
|
|
The scheduler reads the IPCC scores when balancing load. These reads can
|
|
|
|
be quite frequent. Hardware can also update the HFI table frequently.
|
|
|
|
Concurrent access may cause a lot of lock contention. It gets worse as the
|
|
|
|
number of CPUs increases.
|
|
|
|
|
|
|
|
Instead, create separate per-CPU IPCC scores that the scheduler can read
|
|
|
|
without the HFI table lock.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
|
|
|
|
1 file changed, 46 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index ae7eec197f680..e84dcfbef0dd9 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/drivers/thermal/intel/intel_hfi.c
|
|
|
|
+++ b/drivers/thermal/intel/intel_hfi.c
|
|
|
|
@@ -29,6 +29,7 @@
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/math.h>
|
|
|
|
#include <linux/mutex.h>
|
|
|
|
+#include <linux/percpu.h>
|
|
|
|
#include <linux/percpu-defs.h>
|
|
|
|
#include <linux/printk.h>
|
|
|
|
#include <linux/processor.h>
|
|
|
|
@@ -172,6 +173,43 @@ static struct workqueue_struct *hfi_updates_wq;
|
|
|
|
#define HFI_UPDATE_INTERVAL HZ
|
|
|
|
#define HFI_MAX_THERM_NOTIFY_COUNT 16
|
|
|
|
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+static int __percpu *hfi_ipcc_scores;
|
|
|
|
+
|
|
|
|
+static int alloc_hfi_ipcc_scores(void)
|
|
|
|
+{
|
|
|
|
+ if (!cpu_feature_enabled(X86_FEATURE_ITD))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
|
|
|
|
+ hfi_features.nr_classes,
|
|
|
|
+ sizeof(*hfi_ipcc_scores));
|
|
|
|
+
|
|
|
|
+ return !hfi_ipcc_scores;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void set_hfi_ipcc_score(void *caps, int cpu)
|
|
|
|
+{
|
|
|
|
+ int i, *hfi_class;
|
|
|
|
+
|
|
|
|
+ if (!cpu_feature_enabled(X86_FEATURE_ITD))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ hfi_class = per_cpu_ptr(hfi_ipcc_scores, cpu);
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < hfi_features.nr_classes; i++) {
|
|
|
|
+ struct hfi_cpu_data *class_caps;
|
|
|
|
+
|
|
|
|
+ class_caps = caps + i * hfi_features.class_stride;
|
|
|
|
+ WRITE_ONCE(hfi_class[i], class_caps->perf_cap);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#else
|
|
|
|
+static int alloc_hfi_ipcc_scores(void) { return 0; }
|
|
|
|
+static void set_hfi_ipcc_score(void *caps, int cpu) { }
|
|
|
|
+#endif /* CONFIG_IPC_CLASSES */
|
|
|
|
+
|
|
|
|
static void get_hfi_caps(struct hfi_instance *hfi_instance,
|
|
|
|
struct thermal_genl_cpu_caps *cpu_caps)
|
|
|
|
{
|
|
|
|
@@ -194,6 +232,8 @@ static void get_hfi_caps(struct hfi_instance *hfi_instance,
|
|
|
|
cpu_caps[i].efficiency = caps->ee_cap << 2;
|
|
|
|
|
|
|
|
++i;
|
|
|
|
+
|
|
|
|
+ set_hfi_ipcc_score(caps, cpu);
|
|
|
|
}
|
|
|
|
raw_spin_unlock_irq(&hfi_instance->table_lock);
|
|
|
|
}
|
|
|
|
@@ -572,8 +612,14 @@ void __init intel_hfi_init(void)
|
|
|
|
if (!hfi_updates_wq)
|
|
|
|
goto err_nomem;
|
|
|
|
|
|
|
|
+ if (alloc_hfi_ipcc_scores())
|
|
|
|
+ goto err_ipcc;
|
|
|
|
+
|
|
|
|
return;
|
|
|
|
|
|
|
|
+err_ipcc:
|
|
|
|
+ destroy_workqueue(hfi_updates_wq);
|
|
|
|
+
|
|
|
|
err_nomem:
|
|
|
|
for (j = 0; j < i; ++j) {
|
|
|
|
hfi_instance = &hfi_instances[j];
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 8826c8ac9227c660003590b333f80a0d144cba2e Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:55 -0800
|
|
|
|
Subject: [PATCH] thermal: intel: hfi: Update the IPC class of the current task
|
|
|
|
|
|
|
|
Use Intel Thread Director classification to update the IPC class of a
|
|
|
|
task. Implement the arch_update_ipcc() interface of the scheduler.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/include/asm/topology.h | 6 ++++++
|
|
|
|
drivers/thermal/intel/intel_hfi.c | 32 +++++++++++++++++++++++++++++++
|
|
|
|
2 files changed, 38 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index 458c891a82736..ffcdac3f398f0 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/include/asm/topology.h
|
|
|
|
+++ b/arch/x86/include/asm/topology.h
|
|
|
|
@@ -227,4 +227,10 @@ void init_freq_invariance_cppc(void);
|
|
|
|
#define arch_init_invariance_cppc init_freq_invariance_cppc
|
|
|
|
#endif
|
|
|
|
|
|
|
|
+#if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
|
|
|
|
+void intel_hfi_update_ipcc(struct task_struct *curr);
|
|
|
|
+
|
|
|
|
+#define arch_update_ipcc intel_hfi_update_ipcc
|
|
|
|
+#endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
|
|
|
|
+
|
|
|
|
#endif /* _ASM_X86_TOPOLOGY_H */
|
|
|
|
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index e84dcfbef0dd9..f2de597b3118a 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/drivers/thermal/intel/intel_hfi.c
|
|
|
|
+++ b/drivers/thermal/intel/intel_hfi.c
|
|
|
|
@@ -74,6 +74,17 @@ union cpuid6_edx {
|
|
|
|
u32 full;
|
|
|
|
};
|
|
|
|
|
|
|
|
+#ifdef CONFIG_IPC_CLASSES
|
|
|
|
+union hfi_thread_feedback_char_msr {
|
|
|
|
+ struct {
|
|
|
|
+ u64 classid : 8;
|
|
|
|
+ u64 __reserved : 55;
|
|
|
|
+ u64 valid : 1;
|
|
|
|
+ } split;
|
|
|
|
+ u64 full;
|
|
|
|
+};
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
/**
|
|
|
|
* struct hfi_cpu_data - HFI capabilities per CPU
|
|
|
|
* @perf_cap: Performance capability
|
|
|
|
@@ -176,6 +187,27 @@ static struct workqueue_struct *hfi_updates_wq;
|
|
|
|
#ifdef CONFIG_IPC_CLASSES
|
|
|
|
static int __percpu *hfi_ipcc_scores;
|
|
|
|
|
|
|
|
+void intel_hfi_update_ipcc(struct task_struct *curr)
|
|
|
|
+{
|
|
|
|
+ union hfi_thread_feedback_char_msr msr;
|
|
|
|
+
|
|
|
|
+ /* We should not be here if ITD is not supported. */
|
|
|
|
+ if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
|
|
|
|
+ pr_warn_once("task classification requested but not supported!");
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ rdmsrl(MSR_IA32_HW_FEEDBACK_CHAR, msr.full);
|
|
|
|
+ if (!msr.split.valid)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * 0 is a valid classification for Intel Thread Director. A scheduler
|
|
|
|
+ * IPCC class of 0 means that the task is unclassified. Adjust.
|
|
|
|
+ */
|
|
|
|
+ curr->ipcc = msr.split.classid + 1;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static int alloc_hfi_ipcc_scores(void)
|
|
|
|
{
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_ITD))
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From e6c5f3a62bb1fc563887cca4df785b49f77ef0af Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:56 -0800
|
|
|
|
Subject: [PATCH] thermal: intel: hfi: Report the IPC class score of a CPU
|
|
|
|
|
|
|
|
Implement the arch_get_ipcc_score() interface of the scheduler. Use the
|
|
|
|
performance capabilities of the extended Hardware Feedback Interface table
|
|
|
|
as the IPC score.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/include/asm/topology.h | 2 ++
|
|
|
|
drivers/thermal/intel/intel_hfi.c | 27 +++++++++++++++++++++++++++
|
|
|
|
2 files changed, 29 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index ffcdac3f398f0..c4fcd9c3c634f 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/include/asm/topology.h
|
|
|
|
+++ b/arch/x86/include/asm/topology.h
|
|
|
|
@@ -229,8 +229,10 @@ void init_freq_invariance_cppc(void);
|
|
|
|
|
|
|
|
#if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
|
|
|
|
void intel_hfi_update_ipcc(struct task_struct *curr);
|
|
|
|
+unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu);
|
|
|
|
|
|
|
|
#define arch_update_ipcc intel_hfi_update_ipcc
|
|
|
|
+#define arch_get_ipcc_score intel_hfi_get_ipcc_score
|
|
|
|
#endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
|
|
|
|
|
|
|
|
#endif /* _ASM_X86_TOPOLOGY_H */
|
|
|
|
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index f2de597b3118a..e8ad2be47e82f 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/drivers/thermal/intel/intel_hfi.c
|
|
|
|
+++ b/drivers/thermal/intel/intel_hfi.c
|
|
|
|
@@ -208,6 +208,33 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
|
|
|
|
curr->ipcc = msr.split.classid + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
+unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
|
|
|
|
+{
|
|
|
|
+ unsigned short hfi_class;
|
|
|
|
+ int *scores;
|
|
|
|
+
|
|
|
|
+ if (cpu < 0 || cpu >= nr_cpu_ids)
|
|
|
|
+ return -EINVAL;
|
|
|
|
+
|
|
|
|
+ if (ipcc == IPC_CLASS_UNCLASSIFIED)
|
|
|
|
+ return -EINVAL;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Scheduler IPC classes start at 1. HFI classes start at 0.
|
|
|
|
+ * See note intel_hfi_update_ipcc().
|
|
|
|
+ */
|
|
|
|
+ hfi_class = ipcc - 1;
|
|
|
|
+
|
|
|
|
+ if (hfi_class >= hfi_features.nr_classes)
|
|
|
|
+ return -EINVAL;
|
|
|
|
+
|
|
|
|
+ scores = per_cpu_ptr(hfi_ipcc_scores, cpu);
|
|
|
|
+ if (!scores)
|
|
|
|
+ return -ENODEV;
|
|
|
|
+
|
|
|
|
+ return READ_ONCE(scores[hfi_class]);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static int alloc_hfi_ipcc_scores(void)
|
|
|
|
{
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_ITD))
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 502fea0f97180e7e5a9fb1a88d31659d30813348 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:57 -0800
|
|
|
|
Subject: [PATCH] thermal: intel: hfi: Define a default class for unclassified
|
|
|
|
tasks
|
|
|
|
|
|
|
|
A task may be unclassified if it has been recently created, spend most of
|
|
|
|
its lifetime sleeping, or hardware has not provided a classification.
|
|
|
|
|
|
|
|
Most tasks will be eventually classified as scheduler's IPC class 1
|
|
|
|
(HFI class 0). This class corresponds to the capabilities in the legacy,
|
|
|
|
classless, HFI table.
|
|
|
|
|
|
|
|
IPC class 1 is a reasonable choice until hardware provides an actual
|
|
|
|
classification. Meanwhile, the scheduler will place classes of tasks with
|
|
|
|
higher IPC scores on higher-performance CPUs.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
drivers/thermal/intel/intel_hfi.c | 15 ++++++++++++++-
|
|
|
|
1 file changed, 14 insertions(+), 1 deletion(-)
|
|
|
|
|
|
|
|
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index e8ad2be47e82f..0f6935705e559 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/drivers/thermal/intel/intel_hfi.c
|
|
|
|
+++ b/drivers/thermal/intel/intel_hfi.c
|
|
|
|
@@ -187,6 +187,19 @@ static struct workqueue_struct *hfi_updates_wq;
|
|
|
|
#ifdef CONFIG_IPC_CLASSES
|
|
|
|
static int __percpu *hfi_ipcc_scores;
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * A task may be unclassified if it has been recently created, spend most of
|
|
|
|
+ * its lifetime sleeping, or hardware has not provided a classification.
|
|
|
|
+ *
|
|
|
|
+ * Most tasks will be classified as scheduler's IPC class 1 (HFI class 0)
|
|
|
|
+ * eventually. Meanwhile, the scheduler will place classes of tasks with higher
|
|
|
|
+ * IPC scores on higher-performance CPUs.
|
|
|
|
+ *
|
|
|
|
+ * IPC class 1 is a reasonable choice. It matches the performance capability
|
|
|
|
+ * of the legacy, classless, HFI table.
|
|
|
|
+ */
|
|
|
|
+#define HFI_UNCLASSIFIED_DEFAULT 1
|
|
|
|
+
|
|
|
|
void intel_hfi_update_ipcc(struct task_struct *curr)
|
|
|
|
{
|
|
|
|
union hfi_thread_feedback_char_msr msr;
|
|
|
|
@@ -217,7 +230,7 @@ unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (ipcc == IPC_CLASS_UNCLASSIFIED)
|
|
|
|
- return -EINVAL;
|
|
|
|
+ ipcc = HFI_UNCLASSIFIED_DEFAULT;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Scheduler IPC classes start at 1. HFI classes start at 0.
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 5c8a8a9dc0da32f5cb1321e5cd86e68c7221af74 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:58 -0800
|
|
|
|
Subject: [PATCH] thermal: intel: hfi: Enable the Intel Thread Director
|
|
|
|
|
|
|
|
Enable Intel Thread Director from the CPU hotplug callback: globally from
|
|
|
|
CPU0 and then enable the thread-classification hardware in each logical
|
|
|
|
processor individually.
|
|
|
|
|
|
|
|
Also, initialize the number of classes supported.
|
|
|
|
|
|
|
|
Let the scheduler know that it can start using IPC classes.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/include/asm/msr-index.h | 2 ++
|
|
|
|
drivers/thermal/intel/intel_hfi.c | 40 +++++++++++++++++++++++++++++--
|
|
|
|
2 files changed, 40 insertions(+), 2 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index 117e4e977b55d..b4a5de303b88a 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/include/asm/msr-index.h
|
|
|
|
+++ b/arch/x86/include/asm/msr-index.h
|
2023-03-11 22:54:06 +00:00
|
|
|
@@ -1089,6 +1089,8 @@
|
2023-03-06 23:14:39 +00:00
|
|
|
/* Hardware Feedback Interface */
|
|
|
|
#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
|
|
|
|
#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
|
|
|
|
+#define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
|
|
|
|
+#define MSR_IA32_HW_FEEDBACK_CHAR 0x17d2
|
|
|
|
|
|
|
|
/* x2APIC locked status */
|
|
|
|
#define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
|
|
|
|
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 0f6935705e559..21a0d246ca501 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/drivers/thermal/intel/intel_hfi.c
|
|
|
|
+++ b/drivers/thermal/intel/intel_hfi.c
|
|
|
|
@@ -50,6 +50,8 @@
|
|
|
|
/* Hardware Feedback Interface MSR configuration bits */
|
|
|
|
#define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
|
|
|
|
#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
|
|
|
|
+#define HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT BIT(1)
|
|
|
|
+#define HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT BIT(0)
|
|
|
|
|
|
|
|
/* CPUID detection and enumeration definitions for HFI */
|
|
|
|
|
|
|
|
@@ -74,6 +76,15 @@ union cpuid6_edx {
|
|
|
|
u32 full;
|
|
|
|
};
|
|
|
|
|
|
|
|
+union cpuid6_ecx {
|
|
|
|
+ struct {
|
|
|
|
+ u32 dont_care0:8;
|
|
|
|
+ u32 nr_classes:8;
|
|
|
|
+ u32 dont_care1:16;
|
|
|
|
+ } split;
|
|
|
|
+ u32 full;
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
#ifdef CONFIG_IPC_CLASSES
|
|
|
|
union hfi_thread_feedback_char_msr {
|
|
|
|
struct {
|
|
|
|
@@ -498,6 +509,11 @@ void intel_hfi_online(unsigned int cpu)
|
|
|
|
|
|
|
|
init_hfi_cpu_index(info);
|
|
|
|
|
|
|
|
+ if (cpu_feature_enabled(X86_FEATURE_ITD)) {
|
|
|
|
+ msr_val = HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT;
|
|
|
|
+ wrmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* Now check if the HFI instance of the package/die of @cpu has been
|
|
|
|
* initialized (by checking its header). In such case, all we have to
|
|
|
|
@@ -553,8 +569,22 @@ void intel_hfi_online(unsigned int cpu)
|
|
|
|
*/
|
|
|
|
rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
|
|
|
|
msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
|
|
|
|
+
|
|
|
|
+ if (cpu_feature_enabled(X86_FEATURE_ITD))
|
|
|
|
+ msr_val |= HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT;
|
|
|
|
+
|
|
|
|
wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * We have all we need to support IPC classes. Task classification is
|
|
|
|
+ * now working.
|
|
|
|
+ *
|
|
|
|
+ * All class scores are zero until after the first HFI update. That is
|
|
|
|
+ * OK. The scheduler queries these scores at every load balance.
|
|
|
|
+ */
|
|
|
|
+ if (cpu_feature_enabled(X86_FEATURE_ITD))
|
|
|
|
+ sched_enable_ipc_classes();
|
|
|
|
+
|
|
|
|
unlock:
|
|
|
|
mutex_unlock(&hfi_instance_lock);
|
|
|
|
return;
|
|
|
|
@@ -632,8 +662,14 @@ static __init int hfi_parse_features(void)
|
|
|
|
*/
|
|
|
|
hfi_features.class_stride = nr_capabilities;
|
|
|
|
|
|
|
|
- /* For now, use only one class of the HFI table */
|
|
|
|
- hfi_features.nr_classes = 1;
|
|
|
|
+ if (cpu_feature_enabled(X86_FEATURE_ITD)) {
|
|
|
|
+ union cpuid6_ecx ecx;
|
|
|
|
+
|
|
|
|
+ ecx.full = cpuid_ecx(CPUID_HFI_LEAF);
|
|
|
|
+ hfi_features.nr_classes = ecx.split.nr_classes;
|
|
|
|
+ } else {
|
|
|
|
+ hfi_features.nr_classes = 1;
|
|
|
|
+ }
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The header contains change indications for each supported feature.
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 721e6a04633384f5f0a837ee1347e99ba9f2357d Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:10:59 -0800
|
|
|
|
Subject: [PATCH] sched/task_struct: Add helpers for IPC classification
|
|
|
|
|
|
|
|
The unprocessed classification that hardware provides for a task may not
|
|
|
|
be usable by the scheduler: the classification may change too frequently or
|
|
|
|
architectures may want to consider extra factors. For instance, some
|
|
|
|
processors with Intel Thread Director need to consider the state of the SMT
|
|
|
|
siblings of a core.
|
|
|
|
|
|
|
|
Provide per-task helper variables that architectures can use to post-
|
|
|
|
process the classification that hardware provides.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
include/linux/sched.h | 12 +++++++++++-
|
|
|
|
1 file changed, 11 insertions(+), 1 deletion(-)
|
|
|
|
|
|
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index e58dc7503864c..63c2f88f0168a 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/include/linux/sched.h
|
|
|
|
+++ b/include/linux/sched.h
|
|
|
|
@@ -1535,7 +1535,17 @@ struct task_struct {
|
|
|
|
* A hardware-defined classification of task that reflects but is
|
|
|
|
* not identical to the number of instructions per cycle.
|
|
|
|
*/
|
|
|
|
- unsigned short ipcc;
|
|
|
|
+ unsigned int ipcc : 9;
|
|
|
|
+ /*
|
|
|
|
+ * A candidate classification that arch-specific implementations
|
|
|
|
+ * qualify for correctness.
|
|
|
|
+ */
|
|
|
|
+ unsigned int ipcc_tmp : 9;
|
|
|
|
+ /*
|
|
|
|
+ * Counter to filter out transient candidate classifications
|
|
|
|
+ * of a task.
|
|
|
|
+ */
|
|
|
|
+ unsigned int ipcc_cntr : 14;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From afb386225a0c507949d96ea5122003edce813bb0 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:11:00 -0800
|
|
|
|
Subject: [PATCH] sched/core: Initialize helpers of task classification
|
|
|
|
|
|
|
|
Just as tasks start life unclassified, initialize the classification
|
|
|
|
auxiliar variables.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
kernel/sched/core.c | 2 ++
|
|
|
|
1 file changed, 2 insertions(+)
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 4782b1359eb89..d9a026845d51c 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/core.c
|
|
|
|
+++ b/kernel/sched/core.c
|
|
|
|
@@ -4379,6 +4379,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|
|
|
p->se.vruntime = 0;
|
|
|
|
#ifdef CONFIG_IPC_CLASSES
|
|
|
|
p->ipcc = IPC_CLASS_UNCLASSIFIED;
|
|
|
|
+ p->ipcc_tmp = IPC_CLASS_UNCLASSIFIED;
|
|
|
|
+ p->ipcc_cntr = 0;
|
|
|
|
#endif
|
|
|
|
INIT_LIST_HEAD(&p->se.group_node);
|
|
|
|
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 20d6343c567733d5a2704e1f4bb437acd115683e Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:11:01 -0800
|
|
|
|
Subject: [PATCH] sched/fair: Introduce sched_smt_siblings_idle()
|
|
|
|
|
|
|
|
X86 needs to know the idle state of the SMT siblings of a CPU to improve
|
|
|
|
the accuracy of IPCC classification. X86 implements support for IPC classes
|
|
|
|
in the thermal HFI driver.
|
|
|
|
|
|
|
|
Rename is_core_idle() as sched_smt_siblings_idle() and make it available
|
|
|
|
outside the scheduler code.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
include/linux/sched.h | 2 ++
|
|
|
|
kernel/sched/fair.c | 21 +++++++++++++++------
|
|
|
|
2 files changed, 17 insertions(+), 6 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index 63c2f88f0168a..b049584f4c1a5 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/include/linux/sched.h
|
|
|
|
+++ b/include/linux/sched.h
|
|
|
|
@@ -2446,4 +2446,6 @@ static inline void sched_core_fork(struct task_struct *p) { }
|
|
|
|
|
|
|
|
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
|
|
|
|
|
|
|
|
+extern bool sched_smt_siblings_idle(int cpu);
|
|
|
|
+
|
|
|
|
#endif
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index fc42b58f1ba42..ff1fd953258bd 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -1049,7 +1049,14 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
|
* Scheduling class queueing methods:
|
|
|
|
*/
|
|
|
|
|
|
|
|
-static inline bool is_core_idle(int cpu)
|
|
|
|
+/**
|
|
|
|
+ * sched_smt_siblings_idle - Check whether SMT siblings of a CPU are idle
|
|
|
|
+ * @cpu: The CPU to check
|
|
|
|
+ *
|
|
|
|
+ * Returns true if all the SMT siblings of @cpu are idle or @cpu does not have
|
|
|
|
+ * SMT siblings. The idle state of @cpu is not considered.
|
|
|
|
+ */
|
|
|
|
+bool sched_smt_siblings_idle(int cpu)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SCHED_SMT
|
|
|
|
int sibling;
|
|
|
|
@@ -1755,7 +1762,7 @@ static inline int numa_idle_core(int idle_core, int cpu)
|
|
|
|
* Prefer cores instead of packing HT siblings
|
|
|
|
* and triggering future load balancing.
|
|
|
|
*/
|
|
|
|
- if (is_core_idle(cpu))
|
|
|
|
+ if (sched_smt_siblings_idle(cpu))
|
|
|
|
idle_core = cpu;
|
|
|
|
|
|
|
|
return idle_core;
|
|
|
|
@@ -9306,7 +9313,8 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
|
|
|
|
* If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
|
|
|
|
* is not sufficient. We need to make sure the whole core is idle.
|
|
|
|
*/
|
|
|
|
- if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
|
|
|
|
+ if (sds->local->flags & SD_SHARE_CPUCAPACITY &&
|
|
|
|
+ !sched_smt_siblings_idle(env->dst_cpu))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Only do SMT checks if either local or candidate have SMT siblings. */
|
|
|
|
@@ -10475,7 +10483,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
|
|
|
|
sched_asym_prefer(i, env->dst_cpu) &&
|
|
|
|
nr_running == 1) {
|
|
|
|
if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
|
|
|
|
- (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
|
|
|
|
+ (!(env->sd->flags & SD_SHARE_CPUCAPACITY) &&
|
|
|
|
+ sched_smt_siblings_idle(i)))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
@@ -10604,7 +10613,7 @@ asym_active_balance(struct lb_env *env)
|
|
|
|
* busy sibling.
|
|
|
|
*/
|
|
|
|
return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
|
|
|
|
- !is_core_idle(env->src_cpu);
|
|
|
|
+ !sched_smt_siblings_idle(env->src_cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
@@ -11351,7 +11360,7 @@ static void nohz_balancer_kick(struct rq *rq)
|
|
|
|
*/
|
|
|
|
if (sd->flags & SD_SHARE_CPUCAPACITY ||
|
|
|
|
(!(sd->flags & SD_SHARE_CPUCAPACITY) &&
|
|
|
|
- is_core_idle(i))) {
|
|
|
|
+ sched_smt_siblings_idle(i))) {
|
|
|
|
flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
|
|
|
|
goto unlock;
|
|
|
|
}
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 0a346e6136d9ca6c52ef6061e5d24b231629cd67 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:11:02 -0800
|
|
|
|
Subject: [PATCH] thermal: intel: hfi: Implement model-specific checks for task
|
|
|
|
classification
|
|
|
|
|
|
|
|
In Alder Lake and Raptor Lake, the result of thread classification is more
|
|
|
|
accurate when only one SMT sibling is busy. Classification results for
|
|
|
|
class 2 and 3 are always reliable.
|
|
|
|
|
|
|
|
To avoid unnecessary migrations, only update the class of a task if it has
|
|
|
|
been the same during 4 consecutive user ticks.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
|
|
|
|
1 file changed, 59 insertions(+), 1 deletion(-)
|
|
|
|
|
|
|
|
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 21a0d246ca501..751b84b6b8fda 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/drivers/thermal/intel/intel_hfi.c
|
|
|
|
+++ b/drivers/thermal/intel/intel_hfi.c
|
|
|
|
@@ -40,6 +40,7 @@
|
|
|
|
#include <linux/workqueue.h>
|
|
|
|
|
|
|
|
#include <asm/msr.h>
|
|
|
|
+#include <asm/intel-family.h>
|
|
|
|
|
|
|
|
#include "../thermal_core.h"
|
|
|
|
#include "intel_hfi.h"
|
|
|
|
@@ -211,9 +212,64 @@ static int __percpu *hfi_ipcc_scores;
|
|
|
|
*/
|
|
|
|
#define HFI_UNCLASSIFIED_DEFAULT 1
|
|
|
|
|
|
|
|
+#define CLASS_DEBOUNCER_SKIPS 4
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * debounce_and_update_class() - Process and update a task's classification
|
|
|
|
+ *
|
|
|
|
+ * @p: The task of which the classification will be updated
|
|
|
|
+ * @new_ipcc: The new IPC classification
|
|
|
|
+ *
|
|
|
|
+ * Update the classification of @p with the new value that hardware provides.
|
|
|
|
+ * Only update the classification of @p if it has been the same during
|
|
|
|
+ * CLASS_DEBOUNCER_SKIPS consecutive ticks.
|
|
|
|
+ */
|
|
|
|
+static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
|
|
|
|
+{
|
|
|
|
+ u16 debounce_skip;
|
|
|
|
+
|
|
|
|
+ /* The class of @p changed. Only restart the debounce counter. */
|
|
|
|
+ if (p->ipcc_tmp != new_ipcc) {
|
|
|
|
+ p->ipcc_cntr = 1;
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * The class of @p did not change. Update it if it has been the same
|
|
|
|
+ * for CLASS_DEBOUNCER_SKIPS user ticks.
|
|
|
|
+ */
|
|
|
|
+ debounce_skip = p->ipcc_cntr + 1;
|
|
|
|
+ if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
|
|
|
|
+ p->ipcc_cntr++;
|
|
|
|
+ else
|
|
|
|
+ p->ipcc = new_ipcc;
|
|
|
|
+
|
|
|
|
+out:
|
|
|
|
+ p->ipcc_tmp = new_ipcc;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static bool classification_is_accurate(u8 hfi_class, bool smt_siblings_idle)
|
|
|
|
+{
|
|
|
|
+ switch (boot_cpu_data.x86_model) {
|
|
|
|
+ case INTEL_FAM6_ALDERLAKE:
|
|
|
|
+ case INTEL_FAM6_ALDERLAKE_L:
|
|
|
|
+ case INTEL_FAM6_RAPTORLAKE:
|
|
|
|
+ case INTEL_FAM6_RAPTORLAKE_P:
|
|
|
|
+ case INTEL_FAM6_RAPTORLAKE_S:
|
|
|
|
+ if (hfi_class == 3 || hfi_class == 2 || smt_siblings_idle)
|
|
|
|
+ return true;
|
|
|
|
+
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ default:
|
|
|
|
+ return true;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
void intel_hfi_update_ipcc(struct task_struct *curr)
|
|
|
|
{
|
|
|
|
union hfi_thread_feedback_char_msr msr;
|
|
|
|
+ bool idle;
|
|
|
|
|
|
|
|
/* We should not be here if ITD is not supported. */
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
|
|
|
|
@@ -229,7 +285,9 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
|
|
|
|
* 0 is a valid classification for Intel Thread Director. A scheduler
|
|
|
|
* IPCC class of 0 means that the task is unclassified. Adjust.
|
|
|
|
*/
|
|
|
|
- curr->ipcc = msr.split.classid + 1;
|
|
|
|
+ idle = sched_smt_siblings_idle(task_cpu(curr));
|
|
|
|
+ if (classification_is_accurate(msr.split.classid, idle))
|
|
|
|
+ debounce_and_update_class(curr, msr.split.classid + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From 0ae9abd0c6e2109c538599229c213539fb1386ca Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:11:03 -0800
|
|
|
|
Subject: [PATCH] x86/cpufeatures: Add feature bit for HRESET
|
|
|
|
|
|
|
|
The HRESET instruction prevents the classification of the current task
|
|
|
|
from influencing the classification of the next task when running serially
|
|
|
|
on the same logical processor.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/include/asm/cpufeatures.h | 1 +
|
|
|
|
arch/x86/include/asm/msr-index.h | 4 +++-
|
|
|
|
arch/x86/kernel/cpu/scattered.c | 1 +
|
|
|
|
3 files changed, 5 insertions(+), 1 deletion(-)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index 02f10cd5c7536..2fc261302f5cc 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/include/asm/cpufeatures.h
|
|
|
|
+++ b/arch/x86/include/asm/cpufeatures.h
|
|
|
|
@@ -307,6 +307,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
|
|
|
|
+#define X86_FEATURE_HRESET (11*32+23) /* Hardware history reset instruction */
|
|
|
|
|
|
|
|
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
|
|
|
|
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
|
|
|
|
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
|
2023-03-12 00:55:40 +00:00
|
|
|
index b4a5de303b88a..4ef01ce8eadb5 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/include/asm/msr-index.h
|
|
|
|
+++ b/arch/x86/include/asm/msr-index.h
|
2023-03-11 22:54:06 +00:00
|
|
|
@@ -1092,6 +1092,9 @@
|
2023-03-06 23:14:39 +00:00
|
|
|
#define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
|
|
|
|
#define MSR_IA32_HW_FEEDBACK_CHAR 0x17d2
|
|
|
|
|
|
|
|
+/* Hardware History Reset */
|
|
|
|
+#define MSR_IA32_HW_HRESET_ENABLE 0x17da
|
|
|
|
+
|
|
|
|
/* x2APIC locked status */
|
|
|
|
#define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
|
|
|
|
#define LEGACY_XAPIC_DISABLED BIT(0) /*
|
2023-03-11 22:54:06 +00:00
|
|
|
@@ -1099,5 +1102,4 @@
|
2023-03-06 23:14:39 +00:00
|
|
|
* disabling x2APIC will cause
|
|
|
|
* a #GP
|
|
|
|
*/
|
|
|
|
-
|
|
|
|
#endif /* _ASM_X86_MSR_INDEX_H */
|
|
|
|
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index fc01f81f6e2a3..b8ad312d36949 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/kernel/cpu/scattered.c
|
|
|
|
+++ b/arch/x86/kernel/cpu/scattered.c
|
|
|
|
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
|
|
|
|
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
|
|
|
|
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
|
|
|
|
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
|
|
|
|
+ { X86_FEATURE_HRESET, CPUID_EAX, 22, 0x00000007, 1 },
|
|
|
|
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
|
|
|
|
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
|
|
|
|
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From dcf0f4ab904b1ef3090f2de141f55ff42bf9b0f2 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:11:04 -0800
|
|
|
|
Subject: [PATCH] x86/hreset: Configure history reset
|
|
|
|
|
|
|
|
Configure the MSR that controls the behavior of HRESET on each logical
|
|
|
|
processor.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++-
|
|
|
|
1 file changed, 22 insertions(+), 1 deletion(-)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index c34bdba57993a..98b48cf80ec21 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/kernel/cpu/common.c
|
|
|
|
+++ b/arch/x86/kernel/cpu/common.c
|
|
|
|
@@ -411,6 +411,26 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
|
|
|
|
cr4_clear_bits(X86_CR4_UMIP);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static u32 hardware_history_features __ro_after_init;
|
|
|
|
+
|
|
|
|
+static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
|
|
|
|
+{
|
|
|
|
+ if (!cpu_feature_enabled(X86_FEATURE_HRESET))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Use on all CPUs the hardware history features that the boot
|
|
|
|
+ * CPU supports.
|
|
|
|
+ */
|
|
|
|
+ if (c == &boot_cpu_data)
|
|
|
|
+ hardware_history_features = cpuid_ebx(0x20);
|
|
|
|
+
|
|
|
|
+ if (!hardware_history_features)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ wrmsrl(MSR_IA32_HW_HRESET_ENABLE, hardware_history_features);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
/* These bits should not change their value after CPU init is finished. */
|
|
|
|
static const unsigned long cr4_pinned_mask =
|
|
|
|
X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
|
|
|
|
@@ -1828,10 +1848,11 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
|
|
|
/* Disable the PN if appropriate */
|
|
|
|
squash_the_stupid_serial_number(c);
|
|
|
|
|
|
|
|
- /* Set up SMEP/SMAP/UMIP */
|
|
|
|
+ /* Set up SMEP/SMAP/UMIP/HRESET */
|
|
|
|
setup_smep(c);
|
|
|
|
setup_smap(c);
|
|
|
|
setup_umip(c);
|
|
|
|
+ setup_hreset(c);
|
|
|
|
|
|
|
|
/* Enable FSGSBASE instructions if available. */
|
|
|
|
if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|
2023-03-12 00:55:40 +00:00
|
|
|
From d562f8856447bb5019dad771dab745821b383939 Mon Sep 17 00:00:00 2001
|
2023-03-06 23:14:39 +00:00
|
|
|
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Date: Mon, 6 Feb 2023 21:11:05 -0800
|
|
|
|
Subject: [PATCH] x86/process: Reset hardware history in context switch
|
|
|
|
|
|
|
|
Reset the classification history of the current task when switching to the
|
|
|
|
next task. Hardware will start the classification of the next task from
|
|
|
|
scratch.
|
|
|
|
|
|
|
|
Cc: Ben Segall <bsegall@google.com>
|
|
|
|
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
|
|
|
|
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
|
|
|
|
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
|
|
|
|
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
|
|
|
|
Cc: Len Brown <len.brown@intel.com>
|
|
|
|
Cc: Lukasz Luba <lukasz.luba@arm.com>
|
|
|
|
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
|
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
|
|
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
|
|
|
Cc: Steven Rostedt <rostedt@goodmis.org>
|
|
|
|
Cc: Tim C. Chen <tim.c.chen@intel.com>
|
|
|
|
Cc: Valentin Schneider <vschneid@redhat.com>
|
|
|
|
Cc: x86@kernel.org
|
|
|
|
Cc: linux-pm@vger.kernel.org
|
|
|
|
Cc: linux-kernel@vger.kernel.org
|
|
|
|
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
|
|
|
Patchset: intel-thread-director
|
|
|
|
---
|
|
|
|
arch/x86/include/asm/hreset.h | 30 ++++++++++++++++++++++++++++++
|
|
|
|
arch/x86/kernel/cpu/common.c | 7 +++++++
|
|
|
|
arch/x86/kernel/process_32.c | 3 +++
|
|
|
|
arch/x86/kernel/process_64.c | 3 +++
|
|
|
|
4 files changed, 43 insertions(+)
|
|
|
|
create mode 100644 arch/x86/include/asm/hreset.h
|
|
|
|
|
|
|
|
diff --git a/arch/x86/include/asm/hreset.h b/arch/x86/include/asm/hreset.h
|
|
|
|
new file mode 100644
|
2023-03-12 00:55:40 +00:00
|
|
|
index 0000000000000..d68ca2fb8642b
|
2023-03-06 23:14:39 +00:00
|
|
|
--- /dev/null
|
|
|
|
+++ b/arch/x86/include/asm/hreset.h
|
|
|
|
@@ -0,0 +1,30 @@
|
|
|
|
+/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
+#ifndef _ASM_X86_HRESET_H
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * HRESET - History reset. Available since binutils v2.36.
|
|
|
|
+ *
|
|
|
|
+ * Request the processor to reset the history of task classification on the
|
|
|
|
+ * current logical processor. The history components to be
|
|
|
|
+ * reset are specified in %eax. Only bits specified in CPUID(0x20).EBX
|
|
|
|
+ * and enabled in the IA32_HRESET_ENABLE MSR can be selected.
|
|
|
|
+ *
|
|
|
|
+ * The assembly code looks like:
|
|
|
|
+ *
|
|
|
|
+ * hreset %eax
|
|
|
|
+ *
|
|
|
|
+ * The corresponding machine code looks like:
|
|
|
|
+ *
|
|
|
|
+ * F3 0F 3A F0 ModRM Imm
|
|
|
|
+ *
|
|
|
|
+ * The value of ModRM is 0xc0 to specify %eax register addressing.
|
|
|
|
+ * The ignored immediate operand is set to 0.
|
|
|
|
+ *
|
|
|
|
+ * The instruction is documented in the Intel SDM.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+#define __ASM_HRESET ".byte 0xf3, 0xf, 0x3a, 0xf0, 0xc0, 0x0"
|
|
|
|
+
|
|
|
|
+void reset_hardware_history(void);
|
|
|
|
+
|
|
|
|
+#endif /* _ASM_X86_HRESET_H */
|
|
|
|
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 98b48cf80ec21..67bf6fbae082a 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/kernel/cpu/common.c
|
|
|
|
+++ b/arch/x86/kernel/cpu/common.c
|
|
|
|
@@ -52,6 +52,7 @@
|
|
|
|
#include <asm/cpu.h>
|
|
|
|
#include <asm/mce.h>
|
|
|
|
#include <asm/msr.h>
|
|
|
|
+#include <asm/hreset.h>
|
|
|
|
#include <asm/memtype.h>
|
|
|
|
#include <asm/microcode.h>
|
|
|
|
#include <asm/microcode_intel.h>
|
|
|
|
@@ -413,6 +414,12 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
|
|
|
|
|
|
|
|
static u32 hardware_history_features __ro_after_init;
|
|
|
|
|
|
|
|
+void reset_hardware_history(void)
|
|
|
|
+{
|
|
|
|
+ asm_inline volatile (ALTERNATIVE("", __ASM_HRESET, X86_FEATURE_HRESET)
|
|
|
|
+ : : "a" (hardware_history_features) : "memory");
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_HRESET))
|
|
|
|
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index ceab14b6118f7..888cdb7624dcd 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/kernel/process_32.c
|
|
|
|
+++ b/arch/x86/kernel/process_32.c
|
|
|
|
@@ -52,6 +52,7 @@
|
|
|
|
#include <asm/switch_to.h>
|
|
|
|
#include <asm/vm86.h>
|
|
|
|
#include <asm/resctrl.h>
|
|
|
|
+#include <asm/hreset.h>
|
|
|
|
#include <asm/proto.h>
|
|
|
|
|
|
|
|
#include "process.h"
|
|
|
|
@@ -214,6 +215,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
|
|
/* Load the Intel cache allocation PQR MSR. */
|
2023-03-11 22:54:06 +00:00
|
|
|
resctrl_sched_in(next_p);
|
2023-03-06 23:14:39 +00:00
|
|
|
|
|
|
|
+ reset_hardware_history();
|
|
|
|
+
|
|
|
|
return prev_p;
|
|
|
|
}
|
|
|
|
|
|
|
|
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
2023-03-12 00:55:40 +00:00
|
|
|
index 7f94dbbc397b7..d0d3c33237475 100644
|
2023-03-06 23:14:39 +00:00
|
|
|
--- a/arch/x86/kernel/process_64.c
|
|
|
|
+++ b/arch/x86/kernel/process_64.c
|
|
|
|
@@ -53,6 +53,7 @@
|
|
|
|
#include <asm/xen/hypervisor.h>
|
|
|
|
#include <asm/vdso.h>
|
|
|
|
#include <asm/resctrl.h>
|
|
|
|
+#include <asm/hreset.h>
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
#include <asm/fsgsbase.h>
|
|
|
|
#ifdef CONFIG_IA32_EMULATION
|
|
|
|
@@ -658,6 +659,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
|
|
/* Load the Intel cache allocation PQR MSR. */
|
2023-03-11 22:54:06 +00:00
|
|
|
resctrl_sched_in(next_p);
|
2023-03-06 23:14:39 +00:00
|
|
|
|
|
|
|
+ reset_hardware_history();
|
|
|
|
+
|
|
|
|
return prev_p;
|
|
|
|
}
|
|
|
|
|
|
|
|
--
|
|
|
|
2.39.2
|
|
|
|
|