@@ -109,5 +109,125 @@ else
109109 logger " /opt/sagemaker not mounted. Skipping containerd configuration"
110110fi
111111
112+ # ===== EFA FSx LUSTRE CLIENT SETUP =====
113+
114+ setup_efa_fsx_client () {
115+ logger " [INFO] Starting EFA FSx client setup"
116+
117+ # Step 1: OS compatibility check
118+ source /etc/os-release 2> /dev/null || { logger " [INFO] Cannot detect OS, skipping" ; return 0; }
119+
120+ case " $ID -$VERSION_ID " in
121+ " amzn-2023" )
122+ logger " [INFO] Amazon Linux 2023 - supported" ;;
123+ " rhel-9." [5-9]* | " rhel-1" [0-9]* )
124+ logger " [INFO] RHEL $VERSION_ID - supported" ;;
125+ " ubuntu-22.04" | " ubuntu-2" [3-9]* )
126+ # Proper kernel version check for Ubuntu
127+ local kernel_major=$( uname -r | cut -d' .' -f1)
128+ local kernel_minor=$( uname -r | cut -d' .' -f2)
129+ if [[ " $kernel_major " -gt 6 ]] || [[ " $kernel_major " -eq 6 && " $kernel_minor " -ge 8 ]]; then
130+ logger " [INFO] Ubuntu $VERSION_ID kernel ${kernel_major} .${kernel_minor} - supported"
131+ else
132+ logger " [INFO] Ubuntu needs kernel 6.8+, found ${kernel_major} .${kernel_minor} , skipping"
133+ return 0
134+ fi ;;
135+ * )
136+ logger " [INFO] OS $ID $VERSION_ID not supported, skipping"
137+ return 0 ;;
138+ esac
139+
140+ # Step 2: EFA availability check
141+ if [[ ! -x " /opt/amazon/efa/bin/fi_info" ]]; then
142+ logger " [INFO] EFA tools not found, skipping"
143+ return 0
144+ fi
145+
146+ if ! /opt/amazon/efa/bin/fi_info -p efa > /dev/null 2>&1 ; then
147+ logger " [INFO] EFA not available on this instance, skipping"
148+ return 0
149+ fi
150+
151+ logger " [INFO] EFA detected - configuring for FSx Lustre"
152+
153+ # Step 3: Download and setup
154+ cd /tmp || { logger " [ERROR] Cannot access /tmp directory" ; return 1; }
155+
156+ logger " [INFO] Downloading EFA FSx client setup..."
157+ if ! curl --fail --silent --show-error --max-time 30 -o efa-setup.zip \
158+ " https://docs.aws.amazon.com/fsx/latest/LustreGuide/samples/configure-efa-fsx-lustre-client.zip" ; then
159+ logger " [ERROR] Download failed"
160+ return 1
161+ fi
162+
163+ logger " [INFO] Extracting setup files..."
164+ if ! unzip -q efa-setup.zip; then
165+ logger " [ERROR] Extract failed"
166+ rm -f efa-setup.zip
167+ return 1
168+ fi
169+
170+ if [[ ! -f " configure-efa-fsx-lustre-client/setup.sh" ]]; then
171+ logger " [ERROR] Setup script not found in package"
172+ rm -rf configure-efa-fsx-lustre-client* efa-setup.zip
173+ return 1
174+ fi
175+
176+ chmod +x configure-efa-fsx-lustre-client/setup.sh
177+
178+ logger " [INFO] Running EFA FSx client setup..."
179+ if ./configure-efa-fsx-lustre-client/setup.sh; then
180+ logger " [SUCCESS] EFA FSx client configured successfully"
181+ else
182+ logger " [ERROR] EFA FSx client setup failed"
183+ rm -rf configure-efa-fsx-lustre-client* efa-setup.zip
184+ return 1
185+ fi
186+
187+ # Cleanup
188+ rm -rf configure-efa-fsx-lustre-client* efa-setup.zip
189+ return 0
190+ }
191+
192+ # Load Lustre modules
193+ load_lustre_modules () {
194+ logger " [INFO] Loading Lustre kernel modules"
195+
196+ # Load lnet module
197+ if modprobe lnet 2> /dev/null; then
198+ logger " [INFO] lnet module loaded"
199+ else
200+ logger " [WARN] lnet module load failed or already loaded"
201+ fi
202+
203+ # Load lustre module
204+ if modprobe lustre 2> /dev/null; then
205+ logger " [INFO] lustre module loaded"
206+ else
207+ logger " [WARN] lustre module load failed or already loaded"
208+ fi
209+
210+ # Initialize LNet network
211+ if command -v lctl > /dev/null 2>&1 ; then
212+ if lctl network up 2> /dev/null; then
213+ logger " [INFO] LNet network initialized"
214+ else
215+ logger " [INFO] LNet network already active or initialization attempted"
216+ fi
217+ fi
218+ }
219+
220+ # Execute EFA FSx client setup
221+ if setup_efa_fsx_client; then
222+ logger " [INFO] EFA FSx client setup completed successfully"
223+ else
224+ logger " [INFO] EFA FSx client setup skipped or failed - continuing with standard Lustre"
225+ fi
226+
227+ # Load Lustre modules (always execute)
228+ load_lustre_modules
229+
230+ logger " [INFO] FSx client setup complete"
231+
112232logger " no more steps to run"
113233logger " [stop] on_create.sh"
0 commit comments