Skip to content

Commit d29de77

Browse files
committed
Merge branch 'ckelly_develop' of https://github.com/CODARcode/PerformanceAnalysis into ckelly_develop
2 parents ea53d6e + c6d7e7f commit d29de77

File tree

1 file changed

+61
-66
lines changed

1 file changed

+61
-66
lines changed

scripts/summit/gen_erf_summit.sh

Lines changed: 61 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,67 @@
11
#!/usr/bin/perl
22

3-
#A script to generate ERF files for Summit job placement
4-
5-
###################################### USER SHOULD EDIT ###################################################################################
6-
$nhosts=17; #should be the number of hosts (nodes) that the application will run on, plus one for the provDB and pserver. The job script should allocate this many nodes
7-
$nranks_per_host=6; #number of MPI ranks per host (node) for the application
3+
if(scalar @ARGV != 5){
4+
print "Expect: <total number of nodes> <ranks per node> <cores per rank main> <gpus per rank main> <cores per host for AD>\n";
5+
exit 1;
6+
}
87

9-
#NB total ranks is ($nhosts -1) * $nranks_per_host
108

11-
$ncores_per_rank_ad=1; #the number of cores to assign per MPI rank to the AD instances. Remaining cores on socket are divided evenly over the ranks of the application
12-
$ngpus_per_rank_main=1; #number of GPUs per MPI rank for the application
13-
$use_provdb=1; #disable provdb if 0
14-
###################################### END OF USER EDITED SECTION ###################################################################################
9+
$nhosts=$ARGV[0]; #should be the number of hosts (nodes) that the application will run on, plus one for the provDB and pserver. The job script should allocate this many nodes
10+
$nranks_per_host=${ARGV[1]}; #number of MPI ranks per host (node) for the application
11+
$ncores_per_rank_main=${ARGV[2]}; #the number of cores to assign per MPI rank to the main app
12+
$ngpus_per_rank_main=${ARGV[3]}; #number of GPUs per MPI rank for the application
13+
$ncores_per_host_ad = ${ARGV[4]}; #number of cores to reserve for Chimbuko AD. Should be divisible by 2 to divide over the 2 sockets
14+
$nthreads_per_rank_ad = 4; #The number of hardware threads per rank of the AD. The are are 4 hardware threads per core, hence if this is set to 4 the AD will have an entire core per rank.
1515

1616
if($ngpus_per_rank_main * $nranks_per_host > 6){
1717
print "Error: too many GPUs per rank\n";
1818
exit;
1919
}
2020

21-
$nranks_total = ($nhosts -1) * $nranks_per_host;
22-
print "Generating ERF files for ${nranks_total} ranks\n";
23-
2421
#Summit characteristics
2522
$ncores_host=42; #cores per node
2623
$ncores_socket=21; #cores per socket
24+
$nthreads_core=4; #hardware threads per core
2725
$socket_offset_core = 88; #core offset of second socket: 22nd physical core on each socket reserved for OS
28-
$socket_offset_gpu = 3; #4 GPUs per socket
29-
30-
#Do the assignment per socket rather than per node to prevent splitting of ranks across sockets
31-
32-
#Pserver and provdb
33-
if($use_provdb){
34-
#1 socket each
35-
open(OUT, ">provdb.erf");
36-
print OUT "cpu_index_using: physical\n";
37-
$corestart_provdb = 0;
38-
$coreend_provdb = 4*$ncores_socket - 1;
39-
print OUT "rank: 0: { host: 1; cpu: {${corestart_provdb}-${coreend_provdb}} } : app 0\n"; # ; mem: *
40-
close(OUT);
41-
42-
open(OUT, ">pserver.erf");
43-
print OUT "cpu_index_using: physical\n";
44-
$corestart_pserver = $socket_offset_core;
45-
$coreend_pserver = $corestart_pserver + 4*$ncores_socket-1;
46-
print OUT "rank: 0: { host: 1; cpu: {${corestart_pserver}-${coreend_pserver}} } : app 0\n";
47-
close(OUT);
48-
}else{
49-
#Both sockets to pserver
50-
$ncores_provdb=$ncores_socket;
51-
$ncores_pserver=$ncores_socket;
52-
53-
open(OUT, ">pserver.erf");
54-
print OUT "cpu_index_using: physical\n";
55-
56-
$corestart_pserver = 0;
57-
$coreend_pserver = 4*$ncores_socket - 1;
58-
59-
$corestart2_pserver = $socket_offset_core;
60-
$coreend2_pserver = $corestart2_pserver + 4*$ncores_socket-1;
61-
print OUT "rank: 0: { host: 1; cpu: {${corestart_pserver}-${coreend_pserver},${corestart2_pserver}-${coreend2_pserver}} } : app 0\n"; # ; mem: *
62-
close(OUT);
63-
}
26+
$socket_offset_gpu = 3; #3 GPUs per socket
27+
28+
#Generate ERF for Chimbuko services
29+
open(OUT, ">services.erf");
30+
print OUT "cpu_index_using: physical\n";
31+
$corestart_socket0 = 0;
32+
$coreend_socket0 = 4*$ncores_socket - 1;
33+
$corestart_socket1 = $socket_offset_core;
34+
$coreend_socket1 = $socket_offset_core + 4*$ncores_socket - 1;
35+
print OUT "rank: 0: { host: 1; cpu: {${corestart_socket0}-${coreend_socket0}},{${corestart_socket1}-${coreend_socket1}} } : app 0\n"; # ; mem: *
36+
close(OUT);
6437

65-
if($nranks_per_socket % 2 != 0){
38+
#Generate ERF for AD and main application
39+
if($nranks_per_host % 2 != 0){
6640
print "Expect number of ranks to be a multiple of 2!";
6741
exit;
6842
}
43+
if($ncores_per_host_ad % 2 != 0){
44+
print "Expect number of host cores for the AD to be a multiple of 2!";
45+
exit;
46+
}
6947

7048
$nranks_per_socket = ${nranks_per_host}/2;
71-
$ncores_per_socket_main = $ncores_socket - $nranks_per_socket * $ncores_per_rank_ad;
72-
$ncores_per_rank_main = 0;
73-
{
74-
use integer;
75-
$ncores_per_rank_main = $ncores_per_socket_main / $nranks_per_socket;
49+
$ncores_per_socket_ad = ${ncores_per_host_ad}/2;
50+
$ncores_per_socket_main = ${ncores_per_rank_main} * ${nranks_per_socket};
51+
52+
if($ncores_per_socket_main + $ncores_per_socket_ad > $ncores_socket){
53+
print "Too many cores per socket!";
54+
exit;
7655
}
77-
print "Assigning ${ncores_per_rank_main} cores per rank to main program\n";
56+
57+
print "Assigning ${ncores_per_rank_main} cores per rank to main program, ${ncores_per_socket_main} cores per socket.\n";
58+
print "Assigning ${ncores_per_socket_ad} cores per socket to the AD\n";
7859

7960
$nhosts_job = $nhosts-1;
8061
$hoststart_job = 2; #0 is launch node, 1 is first compute node
8162

8263
open(OUT, ">ad.erf");
83-
print OUT "cpu_index_using: physical\n";
64+
print OUT "cpu_index_using: physical\noverlapping-rs : allow\noversubscribe-cpu : allow\noversubscribe-mem : allow\n";
8465

8566
open(OUT2, ">main.erf");
8667
print OUT2 "cpu_index_using: physical\n";
@@ -92,27 +73,41 @@ for($h=0;$h<$nhosts_job;$h++){
9273
$rank_off = $h*$nranks_per_host + $s * $nranks_per_socket;
9374

9475
$corestart=$s*$socket_offset_core;
95-
$gpustart = $s*$socket_offset_gpu;
9676

97-
#Assign first cores to AD
77+
#AD shares the same resource set for all ranks
78+
#However we need to manually assign the ranks to cores to prevent all ranks being piled on the first core
79+
$coreend = $corestart + 4*${ncores_per_socket_ad} - 1;
80+
9881
for($r=0;$r<$nranks_per_socket;$r++){
9982
$rank = $r + $rank_off;
100-
$coreend = $corestart + 4*$ncores_per_rank_ad -1;
101-
print OUT "rank: ${rank}: { host: ${host}; cpu: {${corestart}-${coreend}} } : app 0\n";
102-
103-
$corestart = $coreend+1;
83+
$rank_corestart = ( ($nthreads_per_rank_ad*$r) % (4*${ncores_per_socket_ad}) ) + ${corestart}; #round-robin 2 threads per rank
84+
$rank_coreend = $rank_corestart + $nthreads_per_rank_ad - 1;
85+
if($nthreads_per_rank_ad == 1){
86+
print OUT "rank: ${rank}: { host: ${host}; cpu: {${rank_corestart}} } : app 0\n";
87+
}else{
88+
print OUT "rank: ${rank}: { host: ${host}; cpu: {${rank_corestart}-${rank_coreend}} } : app 0\n";
89+
}
10490
}
91+
92+
93+
94+
$gpustart = $s*$socket_offset_gpu;
95+
$corestart = $coreend+1;
96+
10597
#Then to main
10698
for($r=0;$r<$nranks_per_socket;$r++){
10799
$rank = $r + $rank_off;
108100
$coreend = $corestart + 4*$ncores_per_rank_main -1;
109101

110102
$gpuend = $gpustart + $ngpus_per_rank_main - 1;
111-
$gpu_str = "$gpustart - $gpu_end";
103+
$gpu_str = "; gpu: {$gpustart - $gpu_end}";
112104
if($ngpus_per_rank_main == 1){
113-
$gpu_str = "$gpustart";
105+
$gpu_str = "; gpu: {$gpustart}";
106+
}elsif($ngpus_per_rank_main == 0){
107+
$gpu_str = "";
114108
}
115-
print OUT2 "rank: ${rank}: { host: ${host}; cpu: {${corestart}-${coreend}} ; gpu: {${gpu_str}} } : app 0\n";
109+
110+
print OUT2 "rank: ${rank}: { host: ${host}; cpu: {${corestart}-${coreend}}${gpu_str} } : app 0\n";
116111

117112
$gpustart = $gpuend + 1;
118113
$corestart = $coreend+1;

0 commit comments

Comments
 (0)