forked from EESSI/software-layer
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcreate_lmodsitepackage.py
More file actions
executable file
·172 lines (151 loc) · 7.96 KB
/
create_lmodsitepackage.py
File metadata and controls
executable file
·172 lines (151 loc) · 7.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python3
#
# Create SitePackage.lua configuration file for Lmod.
#
import os
import sys
DOT_LMOD = '.lmod'
hook_txt ="""require("strict")
local hook = require("Hook")
local open = io.open
local function read_file(path)
local file = open(path, "rb") -- r read mode and b binary mode
if not file then return nil end
local content = file:read "*a" -- *a or *all reads the whole file
file:close()
return content
end
local function from_eessi_prefix(t)
-- eessi_prefix is the prefix with official EESSI modules
-- e.g. /cvmfs/software.eessi.io/versions/2023.06
local eessi_prefix = os.getenv("EESSI_PREFIX")
-- If EESSI_PREFIX wasn't defined, we cannot check if this module was from the EESSI environment
-- In that case, we assume it isn't, otherwise EESSI_PREFIX would (probably) have been set
if eessi_prefix == nil then
return False
else
-- NOTE: exact paths for site and user extensions aren't final, so may need to be updated later.
-- See https://github.com/EESSI/software-layer/pull/371
-- eessi_prefix_host_injections is the prefix with site-extensions (i.e. additional modules)
-- to the official EESSI modules, e.g. /cvmfs/software.eessi.io/host_injections/2023.06
local eessi_prefix_host_injections = string.gsub(eessi_prefix, 'versions', 'host_injections')
-- eessi_prefix_user_home is the prefix with user-extensions (i.e. additional modules)
-- to the official EESSI modules, e.g. $HOME/eessi/versions/2023.06
local eessi_prefix_user_home = string.gsub(eessi_prefix, os.getenv("EESSI_CVMFS_REPO"), pathJoin(os.getenv("HOME"), "eessi"))
-- Check if the full modulepath starts with the eessi_prefix_*
return string.find(t.fn, "^" .. eessi_prefix) ~= nil or
string.find(t.fn, "^" .. eessi_prefix_host_injections) ~= nil or
string.find(t.fn, "^" .. eessi_prefix_user_home) ~= nil
end
end
local function eessi_cuda_enabled_load_hook(t)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local simpleName = string.match(t.modFullName, "(.-)/")
-- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections.
-- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse
-- to load the CUDA module and print an informative message on how to set up GPU support for EESSI
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
if simpleName == 'CUDA' then
-- get the full host_injections path
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
-- build final path where the CUDA software should be installed
local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
local cudaDirExists = isDir(cudaEasyBuildDir)
if not cudaDirExists then
local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI "
advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI "
advice = advice .. "can find it.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
end
end
-- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker,
-- otherwise, refuse to load the requested module and print error message
local haveGpu = mt:haveProperty(simpleName,"arch","gpu")
if haveGpu then
local arch = os.getenv("EESSI_CPU_FAMILY") or ""
local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
local cudaDriverExists = isFile(cudaDriverFile)
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
if not (cudaDriverExists or singularityCudaExists) then
local advice = "which relies on the CUDA runtime environment and driver libraries. "
advice = advice .. "In order to be able to use the module, you will need "
advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
else
-- CUDA driver exists, now we check its version to see if an update is needed
if cudaDriverExists then
local cudaVersion = read_file(cudaVersionFile)
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
-- driver CUDA versions don't give a patch version for CUDA
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
local driver_libs_need_update = false
if major < major_req then
driver_libs_need_update = true
elseif major == major_req then
if minor < minor_req then
driver_libs_need_update = true
end
end
if driver_libs_need_update == true then
local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
advice = advice .. "Please update your CUDA driver libraries and then "
advice = advice .. "let EESSI know about the update.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
end
end
end
end
end
local function eessi_openmpi_load_hook(t)
-- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1,
-- to work around hang/crash due to bug in OpenMPI;
-- see https://gitlab.com/eessi/support/-/issues/41
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local moduleName = string.match(t.modFullName, "(.-)/")
local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or ""
if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then
local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI"
LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)")
local ompiMcaBtl = os.getenv("OMPI_MCA_btl")
if ompiMcaBtl == nil then
setenv("OMPI_MCA_btl", "^smcuda")
else
setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda")
end
end
end
-- Combine both functions into a single one, as we can only register one function as load hook in lmod
-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed
function eessi_load_hook(t)
-- Only apply CUDA hooks if the loaded module is in the EESSI prefix
-- This avoids getting an Lmod Error when trying to load a CUDA module from a local software stack
if from_eesi_prefix(t) then
eessi_cuda_enabled_load_hook(t)
end
eessi_openmpi_load_hook(t)
end
hook.register("load", eessi_load_hook)
"""
def error(msg):
sys.stderr.write("ERROR: %s\n" % msg)
sys.exit(1)
if len(sys.argv) != 2:
error("Usage: %s <software prefix>" % sys.argv[0])
prefix = sys.argv[1]
if not os.path.exists(prefix):
error("Prefix directory %s does not exist!" % prefix)
sitepackage_path = os.path.join(prefix, DOT_LMOD, 'SitePackage.lua')
try:
os.makedirs(os.path.dirname(sitepackage_path), exist_ok=True)
with open(sitepackage_path, 'w') as fp:
fp.write(hook_txt)
except (IOError, OSError) as err:
error("Failed to create %s: %s" % (sitepackage_path, err))
print(sitepackage_path)