-
Notifications
You must be signed in to change notification settings - Fork 30
Expand file tree
/
Copy pathgres.tf
More file actions
69 lines (68 loc) · 7.42 KB
/
gres.tf
File metadata and controls
69 lines (68 loc) · 7.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
locals {
gres_by_platforms = tomap({
(local.platforms.gpu-h100-sxm) = "nvidia_h100_80gb_hbm3"
(local.platforms.gpu-h200-sxm) = "nvidia_h200"
(local.platforms.gpu-b200-sxm) = "nvidia_b200"
(local.platforms.gpu-b200-sxm-a) = "nvidia_b200"
(local.platforms.gpu-b300-sxm) = "nvidia_b300_sxm6_ac"
})
# The list of GPUs should be sorted by Links field to correspond to the GPU order in nvidia-smi
gres_config_by_platforms = tomap({
(local.platforms.cpu-e2) = [
"AutoDetect=off"
]
(local.platforms.cpu-d3) = [
"AutoDetect=off"
]
(local.platforms.gpu-h100-sxm) = [
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h100-sxm]} File=/dev/nvidia4 Cores=0-63 Links=-1,1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h100-sxm]} File=/dev/nvidia5 Cores=0-63 Links=1,-1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h100-sxm]} File=/dev/nvidia6 Cores=0-63 Links=1,1,-1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h100-sxm]} File=/dev/nvidia7 Cores=0-63 Links=1,1,1,-1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h100-sxm]} File=/dev/nvidia0 Cores=64-127 Links=1,1,1,1,-1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h100-sxm]} File=/dev/nvidia1 Cores=64-127 Links=1,1,1,1,1,-1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h100-sxm]} File=/dev/nvidia2 Cores=64-127 Links=1,1,1,1,1,1,-1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h100-sxm]} File=/dev/nvidia3 Cores=64-127 Links=1,1,1,1,1,1,1,-1 Flags=nvidia_gpu_env",
]
(local.platforms.gpu-h200-sxm) = [
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h200-sxm]} File=/dev/nvidia4 Cores=0-63 Links=-1,1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h200-sxm]} File=/dev/nvidia5 Cores=0-63 Links=1,-1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h200-sxm]} File=/dev/nvidia6 Cores=0-63 Links=1,1,-1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h200-sxm]} File=/dev/nvidia7 Cores=0-63 Links=1,1,1,-1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h200-sxm]} File=/dev/nvidia0 Cores=64-127 Links=1,1,1,1,-1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h200-sxm]} File=/dev/nvidia1 Cores=64-127 Links=1,1,1,1,1,-1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h200-sxm]} File=/dev/nvidia2 Cores=64-127 Links=1,1,1,1,1,1,-1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-h200-sxm]} File=/dev/nvidia3 Cores=64-127 Links=1,1,1,1,1,1,1,-1 Flags=nvidia_gpu_env",
]
(local.platforms.gpu-b200-sxm) = [
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm]} File=/dev/nvidia4 Cores=0-79 Links=-1,1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm]} File=/dev/nvidia5 Cores=0-79 Links=1,-1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm]} File=/dev/nvidia6 Cores=0-79 Links=1,1,-1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm]} File=/dev/nvidia7 Cores=0-79 Links=1,1,1,-1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm]} File=/dev/nvidia0 Cores=80-159 Links=1,1,1,1,-1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm]} File=/dev/nvidia1 Cores=80-159 Links=1,1,1,1,1,-1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm]} File=/dev/nvidia2 Cores=80-159 Links=1,1,1,1,1,1,-1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm]} File=/dev/nvidia3 Cores=80-159 Links=1,1,1,1,1,1,1,-1 Flags=nvidia_gpu_env",
]
(local.platforms.gpu-b200-sxm-a) = [
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm-a]} File=/dev/nvidia7 Cores=0-79 Links=-1,1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm-a]} File=/dev/nvidia6 Cores=0-79 Links=1,-1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm-a]} File=/dev/nvidia5 Cores=0-79 Links=1,1,-1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm-a]} File=/dev/nvidia4 Cores=0-79 Links=1,1,1,-1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm-a]} File=/dev/nvidia3 Cores=80-159 Links=1,1,1,1,-1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm-a]} File=/dev/nvidia2 Cores=80-159 Links=1,1,1,1,1,-1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm-a]} File=/dev/nvidia1 Cores=80-159 Links=1,1,1,1,1,1,-1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b200-sxm-a]} File=/dev/nvidia0 Cores=80-159 Links=1,1,1,1,1,1,1,-1 Flags=nvidia_gpu_env",
]
(local.platforms.gpu-b300-sxm) = [
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b300-sxm]} File=/dev/nvidia7 Cores=0-95 Links=-1,1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b300-sxm]} File=/dev/nvidia6 Cores=0-95 Links=1,-1,1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b300-sxm]} File=/dev/nvidia5 Cores=0-95 Links=1,1,-1,1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b300-sxm]} File=/dev/nvidia4 Cores=0-95 Links=1,1,1,-1,1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b300-sxm]} File=/dev/nvidia3 Cores=96-191 Links=1,1,1,1,-1,1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b300-sxm]} File=/dev/nvidia2 Cores=96-191 Links=1,1,1,1,1,-1,1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b300-sxm]} File=/dev/nvidia1 Cores=96-191 Links=1,1,1,1,1,1,-1,1 Flags=nvidia_gpu_env",
"AutoDetect=off Name=gpu Type=${local.gres_by_platforms[local.platforms.gpu-b300-sxm]} File=/dev/nvidia0 Cores=96-191 Links=1,1,1,1,1,1,1,-1 Flags=nvidia_gpu_env",
]
})
}