Skip to content

Commit 20dd0bc

Browse files
[Benchmarks] Pin benchmarks to small set of cores (#20403)
For better results stability, pin benchmark binaries to four cores with the maximum available frequency. --------- Co-authored-by: Łukasz Ślusarczyk <[email protected]>
1 parent 3eff452 commit 20dd0bc

File tree

5 files changed

+30
-25
lines changed

5 files changed

+30
-25
lines changed

devops/actions/run-tests/benchmark/action.yml

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -82,25 +82,13 @@ runs:
8282
python3 ./devops/scripts/benchmarks/presets.py query "$PRESET"
8383
[ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset
8484
echo "PRESET=$PRESET" >> $GITHUB_ENV
85-
- name: Compute CPU core range to run benchmarks on
85+
- name: Set NUMA node to run benchmarks on
8686
shell: bash
8787
run: |
88-
# Compute the core range for the first NUMA node; second node is used by
89-
# UMF. Skip the first 4 cores as the kernel is likely to schedule more
90-
# work on these.
91-
CORES="$(lscpu | awk '
92-
/NUMA node0 CPU|On-line CPU/ {line=$0}
93-
END {
94-
split(line, a, " ")
95-
split(a[4], b, ",")
96-
sub(/^0/, "4", b[1])
97-
print b[1]
98-
}')"
99-
echo "CPU core range to use: $CORES"
100-
echo "CORES=$CORES" >> $GITHUB_ENV
101-
102-
ZE_AFFINITY_MASK=0
103-
echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
88+
# Set CPU and GPU affinity for the first NUMA node; second node is used by UMF
89+
NUMA_NODE=0
90+
echo "ZE_AFFINITY_MASK=$NUMA_NODE" >> $GITHUB_ENV
91+
echo "NUMA_NODE=$NUMA_NODE" >> $GITHUB_ENV
10492
10593
# Compute-benchmarks relies on UR static libraries, cmake config files, etc.
10694
# DPC++ doesn't ship with these files. The easiest way of obtaining these
@@ -243,7 +231,8 @@ runs:
243231
WORKDIR="$(realpath ./llvm_test_workdir)"
244232
if [ -n "$WORKDIR" ] && [ -d "$WORKDIR" ] && [[ "$WORKDIR" == *llvm_test_workdir* ]]; then rm -rf "$WORKDIR" ; fi
245233

246-
taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$WORKDIR" \
234+
numactl --cpunodebind "$NUMA_NODE" --membind "$NUMA_NODE" \
235+
./devops/scripts/benchmarks/main.py "$WORKDIR" \
247236
--sycl "$(realpath ./toolchain)" \
248237
--ur "$(realpath ./ur/install)" \
249238
--adapter "$FORCELOAD_ADAPTER" \

devops/scripts/benchmarks/benches/base.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@
66
import os
77
import shutil
88
import subprocess
9-
from pathlib import Path
9+
from abc import ABC, abstractmethod
1010
from enum import Enum
11-
from utils.result import BenchmarkMetadata, BenchmarkTag, Result
11+
from pathlib import Path
12+
13+
from psutil import Process
14+
1215
from options import options
13-
from utils.utils import download, run
14-
from abc import ABC, abstractmethod
15-
from utils.unitrace import get_unitrace
1616
from utils.flamegraph import get_flamegraph
1717
from utils.logger import log
18+
from utils.result import BenchmarkMetadata, BenchmarkTag, Result
19+
from utils.unitrace import get_unitrace
20+
from utils.utils import download, run
1821

1922

2023
class TracingType(Enum):
@@ -151,6 +154,8 @@ def run_bench(
151154
log.debug(f"FlameGraph perf data: {perf_data_file}")
152155
log.debug(f"FlameGraph command: {' '.join(command)}")
153156

157+
command = self.taskset_cmd() + command
158+
154159
try:
155160
result = run(
156161
command=command,
@@ -252,6 +257,16 @@ def get_metadata(self) -> dict[str, BenchmarkMetadata]:
252257
)
253258
}
254259

260+
def taskset_cmd(self) -> list[str]:
261+
"""Returns a list of strings with taskset usage for core pinning.
262+
Pin compute benchmarks to a CPU cores set to ensure consistent results
263+
and non-zero CPU count measurements (e.g. avoid E-cores). Exactly 4 cores
264+
are pinned by default to satisfy multiple threads benchmarks. It is assumed
265+
that they have the maximum, or at least similar, frequency.
266+
"""
267+
selected_cores = [str(core) for core in Process().cpu_affinity()[:4]] # type: ignore
268+
return ["taskset", "-c", ",".join(selected_cores)]
269+
255270
@staticmethod
256271
def get_adapter_full_path():
257272
for libs_dir_name in ["lib", "lib64"]:

devops/scripts/benchmarks/benches/compute.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
import copy
76
import csv
87
import io
98
import math

devops/scripts/benchmarks/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ mpld3==0.5.10
33
dataclasses-json==0.6.7
44
PyYAML==6.0.1
55
Mako==1.3.0
6+
psutil>=7.0.0

devops/scripts/install_build_tools.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ apt update && apt install -yqq \
2828
libzstd-dev \
2929
linux-tools-generic \
3030
linux-tools-common \
31-
time
31+
time \
32+
numactl
3233

3334
# To obtain latest release of spriv-tool.
3435
# Same as what's done in SPRIV-LLVM-TRANSLATOR:

0 commit comments

Comments
 (0)