Skip to content

Commit 3782eeb

Browse files
authored
Merge pull request #323 from open-edge-platform/update-branch
fix: update gpu and main scrit for error inconsistencies (#856)
2 parents d302d53 + a2061ea commit 3782eeb

File tree

2 files changed

+141
-69
lines changed

2 files changed

+141
-69
lines changed

gpu_installer.sh

Lines changed: 114 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ readonly S_VALID="[✓]"
1515
readonly S_INFO="[INFO]"
1616

1717
# Global variables
18-
HAS_DGPU=0
1918

2019
# Package arrays - exported for external use
2120
export COMPUTE_PACKAGES=(
@@ -83,6 +82,10 @@ log_success() {
8382
echo "$S_VALID $1"
8483
}
8584

85+
# Global status flags
86+
DRIVER_INSTALL_OK=0
87+
DRIVER_VERIFY_OK=0
88+
8689
# System verification functions
8790
check_privileges() {
8891
if [ "$EUID" -ne 0 ]; then
@@ -136,25 +139,28 @@ verify_kernel() {
136139
# GPU detection functions
137140
detect_gpu() {
138141
echo -e "\n# Detecting GPU devices"
139-
142+
140143
if ! command -v lspci >/dev/null 2>&1; then
141144
error_exit "lspci command not found. Install pciutils: apt-get install pciutils"
142145
fi
143-
144-
local lspci_output
145-
lspci_output=$(lspci -nn | grep -Ei 'VGA|DISPLAY')
146-
147-
if [ -n "$lspci_output" ]; then
148-
log_info "Detected GPU device(s):"
149-
echo "$lspci_output"
150-
log_success "GPU detected - proceeding with Intel GPU driver installation"
151-
152-
# Set global flag for any GPU found
153-
HAS_DGPU=1
146+
147+
local gpu_lines
148+
gpu_lines=$(lspci -nn | grep -Ei 'VGA|DISPLAY' || true)
149+
150+
if [ -z "$gpu_lines" ]; then
151+
error_exit "No GPU (VGA/DISPLAY) devices found. This installer targets Intel GPUs."
152+
fi
153+
154+
log_info "Detected GPU device(s):"
155+
echo "$gpu_lines"
156+
157+
# If any Intel vendor (8086) present, proceed; else exit.
158+
if echo "$gpu_lines" | grep -Fq "[8086:"; then
159+
log_success "Intel GPU vendor (8086) detected; proceeding with installation"
154160
return 0
155-
else
156-
error_exit "No GPU found. This script installs Intel GPU drivers"
157161
fi
162+
163+
error_exit "Non-Intel GPU(s) vendor detected (no PCI vendor 8086 present). Unsupported devices:$(printf "\n%s" "$gpu_lines")"
158164
}
159165

160166
# Repository and package management
@@ -280,10 +286,10 @@ install_gpu_drivers() {
280286
# Post-installation configuration (previously post_installation_fixes)
281287
echo -e "\n# Post-installation configuration"
282288

283-
# Verify critical packages
289+
# Verify critical packages strictly
284290
local critical_packages=("intel-opencl-icd" "libze-intel-gpu1")
285291
local missing_packages=()
286-
292+
local install_errors=0
287293
for pkg in "${critical_packages[@]}"; do
288294
if dpkg -l "$pkg" 2>/dev/null | grep -q "^ii"; then
289295
log_success "$pkg is installed"
@@ -292,55 +298,100 @@ install_gpu_drivers() {
292298
missing_packages+=("$pkg")
293299
fi
294300
done
295-
296301
if [ ${#missing_packages[@]} -gt 0 ]; then
297-
log_info "Reinstalling missing critical packages"
298-
install_packages "${missing_packages[@]}"
302+
log_info "Attempting reinstall of missing critical packages: ${missing_packages[*]}"
303+
if ! install_packages "${missing_packages[@]}"; then
304+
echo "$S_ERROR Reinstall attempt failed for: ${missing_packages[*]}"
305+
install_errors=1
306+
fi
307+
# Re-check after reinstall
308+
for pkg in "${missing_packages[@]}"; do
309+
if ! dpkg -l "$pkg" 2>/dev/null | grep -q "^ii"; then
310+
echo "$S_ERROR Critical package still missing: $pkg"
311+
install_errors=1
312+
fi
313+
done
299314
fi
300-
301-
# Fix DRI device permissions
315+
316+
# DRI device check (required for runtime)
302317
if [ -e "/dev/dri" ]; then
303318
log_info "Checking DRI devices: $(find /dev/dri/ -maxdepth 1 -type f -printf '%f ' 2>/dev/null)"
304-
305-
if ls /dev/dri/render* >/dev/null 2>&1; then
306-
chmod 666 /dev/dri/render*
307-
log_success "Updated render device permissions"
319+
if ! ls /dev/dri/render* >/dev/null 2>&1; then
320+
echo "$S_ERROR No /dev/dri/render* devices found"
321+
install_errors=1
322+
else
323+
chmod 666 /dev/dri/render* || echo "$S_ERROR Failed to adjust permissions on render devices"
324+
log_success "Render devices present"
308325
fi
309326
else
310327
echo "$S_ERROR /dev/dri directory not found"
328+
install_errors=1
329+
fi
330+
331+
if [ $install_errors -eq 0 ]; then
332+
log_success "Intel GPU driver installation prerequisites validated"
333+
DRIVER_INSTALL_OK=1
334+
else
335+
echo "$S_ERROR Driver installation encountered errors"
336+
DRIVER_INSTALL_OK=0
311337
fi
312-
313-
log_success "Intel GPU driver installation and configuration completed"
314338
}
315339

316340
# Verify driver installation
317341
verify_drivers() {
318342
echo -e "\n# Verifying driver installation"
319-
320-
# OpenCL verification
321-
if command -v clinfo >/dev/null 2>&1; then
322-
if clinfo >/dev/null 2>&1; then
323-
log_success "OpenCL runtime working"
324-
325-
local device_names
326-
device_names=$(clinfo 2>/dev/null | grep "Device Name" | grep -i intel)
327-
328-
if [ -n "$device_names" ]; then
329-
log_success "Intel GPU devices detected by OpenCL:"
330-
echo "$device_names"
331-
332-
if [ "$HAS_DGPU" -eq 1 ] && echo "$device_names" | grep -qi "arc\|bmg\|battlemage\|dg2\|alchemist"; then
333-
log_success "Intel Arc discrete GPU working with OpenCL"
334-
fi
335-
else
336-
echo "$S_ERROR No Intel GPU devices found in OpenCL"
337-
fi
338-
else
339-
echo "$S_ERROR clinfo failed to run"
340-
fi
343+
344+
# Step 1: clinfo presence
345+
if ! command -v clinfo >/dev/null 2>&1; then
346+
echo "$S_ERROR clinfo not found (OpenCL runtime not installed)"
347+
DRIVER_VERIFY_OK=0
348+
return 1
349+
fi
350+
351+
# Step 2: clinfo -l device enumeration
352+
local clinfo_list
353+
if ! clinfo_list=$(clinfo -l 2>/dev/null); then
354+
echo "$S_ERROR clinfo -l failed to execute"
355+
DRIVER_VERIFY_OK=0
356+
return 1
357+
fi
358+
359+
# Guard against empty or whitespace-only output
360+
if [ -z "$(printf "%s" "$clinfo_list" | sed -n '/\S/p')" ]; then
361+
echo "$S_ERROR clinfo -l returned no device/platform information"
362+
DRIVER_VERIFY_OK=0
363+
return 1
364+
fi
365+
366+
# Determine device count robustly (avoid duplicate '0 0' from grep fallback)
367+
local device_count
368+
device_count=$(printf "%s" "$clinfo_list" | awk '/Device[[:space:]]*#/{c++} END{print c+0}')
369+
370+
if ! [[ "$device_count" =~ ^[0-9]+$ ]]; then
371+
echo "$S_ERROR OpenCL device count not an integer: '$device_count'"
372+
DRIVER_VERIFY_OK=0
373+
return 1
374+
fi
375+
if [ "$device_count" -eq 0 ]; then
376+
echo "$S_ERROR No OpenCL runtime devices reported (clinfo -l empty)"
377+
DRIVER_VERIFY_OK=0
378+
return 1
379+
fi
380+
381+
log_success "OpenCL runtime working (devices: $device_count)"
382+
383+
# Optional: show Intel devices (non-fatal if none)
384+
local intel_devices
385+
intel_devices=$(printf "%s" "$clinfo_list" | grep -iE '(^|[[:space:]])Intel(|\(R\))' || true)
386+
if [ -n "$intel_devices" ]; then
387+
log_success "Intel OpenCL device entries detected"
388+
echo "$intel_devices"
341389
else
342-
echo "$S_ERROR clinfo not found"
390+
log_info "No explicit Intel entries found in clinfo -l (non-fatal)"
343391
fi
392+
393+
DRIVER_VERIFY_OK=1
394+
return 0
344395
}
345396

346397

@@ -385,9 +436,18 @@ main() {
385436
# Apply temporary fix for Arc B60 on Series 2 CPUs
386437
apply_arc_b60_fix
387438

388-
install_gpu_drivers
389-
verify_drivers
390-
echo -e "\n# $S_VALID GPU installation completed. Please reboot your system."
439+
install_gpu_drivers || echo "$S_ERROR install_gpu_drivers reported failure"
440+
if ! verify_drivers; then
441+
# Avoid literal \n in output; use printf for portability
442+
printf "\n%s GPU installation verification failed. See errors above.\n" "$S_ERROR"
443+
exit 1
444+
fi
445+
if [ $DRIVER_INSTALL_OK -eq 1 ] && [ $DRIVER_VERIFY_OK -eq 1 ]; then
446+
echo -e "\n# $S_VALID GPU installation completed successfully. Please reboot your system."
447+
else
448+
echo -e "\n# $S_ERROR GPU installation incomplete (install_ok=$DRIVER_INSTALL_OK verify_ok=$DRIVER_VERIFY_OK)."
449+
exit 1
450+
fi
391451
}
392452

393453
# Execute main function

main_installer.sh

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -217,17 +217,27 @@ check_intel_arc_gpu() {
217217
# Find any VGA/DISPLAY devices
218218
local lspci_output
219219
lspci_output=$(lspci -nn | grep -Ei 'VGA|DISPLAY')
220-
221-
if [ -n "$lspci_output" ]; then
222-
echo "GPU devices detected:"
223-
echo "$lspci_output"
224-
echo "$S_VALID GPU found - proceeding with Intel GPU driver installation"
225-
return 0
226-
else
220+
221+
if [ -z "$lspci_output" ]; then
227222
echo "$S_WARNING No GPU devices detected"
228223
echo "GPU driver installation will be skipped"
229224
return 1
230225
fi
226+
227+
echo "GPU devices detected:"
228+
echo "$lspci_output"
229+
230+
# Enforce Intel vendor (8086) for GPU install; exit if non-Intel only
231+
if echo "$lspci_output" | grep -Fq "[8086:"; then
232+
echo "$S_VALID Intel (8086) GPU found - proceeding with Intel GPU driver installation"
233+
return 0
234+
else
235+
echo "$S_ERROR Non-Intel GPU(s) detected (no PCI vendor 8086 present)"
236+
echo "Detected devices:"
237+
echo "$lspci_output"
238+
echo "$S_ERROR Exiting: this installer supports Intel GPUs only"
239+
exit 1
240+
fi
231241
}
232242

233243
# Install GPU drivers - only if any GPU is present
@@ -240,13 +250,15 @@ install_gpu_drivers() {
240250
# shellcheck disable=SC1091
241251
if bash "$SCRIPT_DIR/gpu_installer.sh"; then
242252
echo "$S_VALID GPU drivers installed successfully"
243-
244-
# Verify OpenCL setup after installation
245-
# verify_opencl_setup
253+
# Optional: post verification hook could go here
246254
return 0
247255
else
256+
# gpu_installer.sh already emitted detailed errors including
257+
# OpenCL verification failures. Provide consolidated high-level status and exit.
248258
echo "$S_ERROR GPU driver installation failed"
249-
return 1
259+
echo "$S_WARNING GPU driver installation had issues"
260+
echo "$S_ERROR Exiting due to GPU installation failure"
261+
exit 1
250262
fi
251263
else
252264
echo "$S_WARNING Skipping GPU driver installation - no GPU devices detected"
@@ -507,8 +519,8 @@ main() {
507519
echo "NPU Support: Available and will be installed"
508520
echo ""
509521

510-
# Install GPU drivers (will check for GPU presence)
511-
install_gpu_drivers || echo "$S_WARNING GPU driver installation had issues"
522+
# Install GPU drivers (will check for GPU presence). Any failure will exit.
523+
install_gpu_drivers
512524

513525
# Install NPU drivers (Core Ultra only)
514526
install_npu_drivers || echo "$S_WARNING NPU driver installation had issues"
@@ -529,8 +541,8 @@ main() {
529541
echo "NPU Support: Not available (Core Ultra only)"
530542
echo ""
531543

532-
# Install GPU drivers (will check for GPU presence)
533-
install_gpu_drivers || echo "$S_WARNING GPU driver installation had issues"
544+
# Install GPU drivers (will check for GPU presence). Any failure will exit.
545+
install_gpu_drivers
534546

535547
# Install OpenVINO with error handling
536548
if ! install_openvino; then

0 commit comments

Comments
 (0)