On Wed, 6 Mar 2024, Maciej Wieczor-Retman wrote:
Memory Bandwidth Monitoring (MBM) measures how much data flows between cache levels. Only the flow for a process specified with a Resource Monitoring ID (RMID) is measured.
Sub-Numa Clustering (SNC) causes MBM selftest to fail because the increased amount of NUMA nodes per socket is not taken into account. That in turn makes the test use incorrect values for the start and end of the measurement by tracking the wrong node.
For the MBM selftest to be NUMA-aware it needs to track the start and end values of a measurement not for a single node but for all nodes on the same socket. Then summing all measured values comes out as the real bandwidth used by the process.
Reported-by: "Shaopeng Tan (Fujitsu)" tan.shaopeng@fujitsu.com Closes: https://lore.kernel.org/lkml/TYAPR01MB6330A4EB3633B791939EA45E8B39A@TYAPR01M... Signed-off-by: Maciej Wieczor-Retman maciej.wieczor-retman@intel.com
tools/testing/selftests/resctrl/mba_test.c | 1 - tools/testing/selftests/resctrl/resctrl_val.c | 37 ++++++++++++------- 2 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index 7946e32e85c8..fc31a61dab0c 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -147,7 +147,6 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param struct resctrl_val_param param = { .resctrl_val = MBA_STR, .ctrlgrp = "c1",
.filename = RESULT_FILE_NAME, .bw_report = "reads", .setup = mba_setup.mongrp = "m1",
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index e75e3923ebe2..2fe9f8bb4a45 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -595,9 +595,10 @@ static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, static int measure_vals(const struct user_params *uparams, struct resctrl_val_param *param,
unsigned long *bw_resc_start)
unsigned long *bw_resc_start,
int res_id)
{
- unsigned long bw_resc, bw_resc_end;
- unsigned long bw_resc = 0, bw_resc_sum = 0, bw_resc_end; float bw_imc; int ret;
@@ -612,17 +613,19 @@ static int measure_vals(const struct user_params *uparams, if (ret < 0) return ret;
- ret = get_mem_bw_resctrl(&bw_resc_end);
- if (ret < 0)
return ret;
- for (int i = 0 ; i < snc_ways() ; i++) {
set_mbm_path(param->ctrlgrp, strlen(param->mongrp) > 0 ? param->mongrp : NULL,
res_id + i);
ret = get_mem_bw_resctrl(&bw_resc_end);
bw_resc = (bw_resc_end - bw_resc_start[i]) / MB;
bw_resc_sum += bw_resc;
bw_resc_start[i] = bw_resc_end;
- }
- bw_resc = (bw_resc_end - *bw_resc_start) / MB;
- ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
- ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc_sum); if (ret) return ret;
- *bw_resc_start = bw_resc_end;
- return 0;
} @@ -697,12 +700,16 @@ int resctrl_val(const struct resctrl_test *test, struct resctrl_val_param *param) { char *resctrl_val = param->resctrl_val;
- unsigned long bw_resc_start = 0; int res_id, ret = 0, pipefd[2];
- unsigned long *bw_resc_start; struct sigaction sigact; char pipe_message = 0; union sigval value;
- bw_resc_start = calloc(snc_ways(), sizeof(unsigned long));
While correct, this seems a bit overkill given is MAX_SNC = 4, not something large or unbounded.
This patch would be be much simpler on top of my resctrl_val() cleanup patches because bw_resc_start is only local to the measurement function.