Skip to content

Commit 238a09f

Browse files
ChrisRackauckas-ClaudeChrisRackauckasclaude
authored
Add GPU type detection to system information (#713)
This commit enhances LinearSolveAutotune's system information collection to include detailed GPU information when CUDA or Metal GPUs are available. Changes: - Added `get_cuda_gpu_info()` function to retrieve CUDA GPU details: - GPU name/type via CUDA.name() - Number of GPUs - GPU memory in GB via CUDA.totalmem() - CUDA compute capability via CUDA.capability() - All GPU types for multi-GPU systems - Added `get_metal_gpu_info()` function to detect Metal GPUs: - Infers GPU type from CPU model (M1/M2/M3/M4) - Reports GPU count - Updated `get_system_info()` to include GPU information fields - Updated `get_detailed_system_info()` to include GPU fields - Enhanced telemetry markdown formatting to display GPU details The implementation gracefully handles missing GPU hardware or packages, returning empty information when GPUs are not available. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: ChrisRackauckas <[email protected]> Co-authored-by: Claude <[email protected]>
1 parent ac13bfd commit 238a09f

File tree

2 files changed

+177
-0
lines changed

2 files changed

+177
-0
lines changed

lib/LinearSolveAutotune/src/gpu_detection.jl

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,126 @@ function is_metal_available()
7070
end
7171
end
7272

73+
"""
74+
get_cuda_gpu_info()
75+
76+
Get information about CUDA GPU devices if available.
77+
Returns a Dict with GPU type, count, memory, and compute capability.
78+
"""
79+
function get_cuda_gpu_info()
80+
gpu_info = Dict{String, Any}()
81+
82+
# Check if CUDA extension is loaded
83+
ext = Base.get_extension(LinearSolve, :LinearSolveCUDAExt)
84+
if ext === nothing
85+
return gpu_info
86+
end
87+
88+
try
89+
# Get CUDA module from the extension
90+
CUDA = ext.CUDA
91+
92+
# Check if CUDA is functional
93+
if !CUDA.functional()
94+
return gpu_info
95+
end
96+
97+
# Get device information
98+
devices = collect(CUDA.devices())
99+
num_devices = length(devices)
100+
101+
if num_devices > 0
102+
gpu_info["gpu_count"] = num_devices
103+
104+
# Get information from the first GPU
105+
first_device = devices[1]
106+
gpu_info["gpu_type"] = CUDA.name(first_device)
107+
108+
# Convert memory from bytes to GB
109+
total_mem_bytes = CUDA.totalmem(first_device)
110+
gpu_info["gpu_memory_gb"] = round(total_mem_bytes / (1024^3), digits=2)
111+
112+
# Get compute capability
113+
capability = CUDA.capability(first_device)
114+
gpu_info["gpu_capability"] = "$(capability.major).$(capability.minor)"
115+
116+
# If multiple GPUs, list all types
117+
if num_devices > 1
118+
gpu_types = String[]
119+
for dev in devices
120+
push!(gpu_types, CUDA.name(dev))
121+
end
122+
gpu_info["gpu_types"] = unique(gpu_types)
123+
end
124+
end
125+
catch e
126+
# If there's any error, return empty info
127+
@debug "Error getting CUDA GPU info: $e"
128+
end
129+
130+
return gpu_info
131+
end
132+
133+
"""
134+
get_metal_gpu_info()
135+
136+
Get information about Metal GPU devices if available.
137+
Returns a Dict with GPU type and count.
138+
"""
139+
function get_metal_gpu_info()
140+
gpu_info = Dict{String, Any}()
141+
142+
# Check if Metal extension is loaded
143+
ext = Base.get_extension(LinearSolve, :LinearSolveMetalExt)
144+
if ext === nothing
145+
return gpu_info
146+
end
147+
148+
try
149+
# Get Metal module from the extension
150+
Metal = ext.Metal
151+
152+
# Check if Metal is functional
153+
if !Metal.functional()
154+
return gpu_info
155+
end
156+
157+
# Get device information
158+
# Metal typically has one device on Apple Silicon
159+
gpu_info["gpu_count"] = 1
160+
161+
# Determine GPU type based on system architecture
162+
if Sys.ARCH == :aarch64
163+
# Try to get more specific model information
164+
cpu_model = ""
165+
cpu_info = Sys.cpu_info()
166+
if !isempty(cpu_info)
167+
cpu_model = cpu_info[1].model
168+
end
169+
170+
# Infer GPU type from CPU model for Apple Silicon
171+
if contains(lowercase(cpu_model), "m1")
172+
gpu_info["gpu_type"] = "Apple M1 GPU"
173+
elseif contains(lowercase(cpu_model), "m2")
174+
gpu_info["gpu_type"] = "Apple M2 GPU"
175+
elseif contains(lowercase(cpu_model), "m3")
176+
gpu_info["gpu_type"] = "Apple M3 GPU"
177+
elseif contains(lowercase(cpu_model), "m4")
178+
gpu_info["gpu_type"] = "Apple M4 GPU"
179+
else
180+
gpu_info["gpu_type"] = "Apple Silicon GPU"
181+
end
182+
else
183+
gpu_info["gpu_type"] = "Metal GPU"
184+
end
185+
catch e
186+
# If there's any error, return empty info
187+
@debug "Error getting Metal GPU info: $e"
188+
end
189+
190+
return gpu_info
191+
end
192+
73193
"""
74194
get_system_info()
75195
@@ -126,6 +246,26 @@ function get_system_info()
126246
info["blas_vendor"] = string(LinearAlgebra.BLAS.vendor())
127247
info["has_cuda"] = is_cuda_available()
128248
info["has_metal"] = is_metal_available()
249+
250+
# Get GPU information if CUDA is available
251+
if info["has_cuda"]
252+
gpu_info = get_cuda_gpu_info()
253+
if !isempty(gpu_info)
254+
info["gpu_type"] = gpu_info["gpu_type"]
255+
info["gpu_count"] = gpu_info["gpu_count"]
256+
info["gpu_memory_gb"] = gpu_info["gpu_memory_gb"]
257+
info["gpu_capability"] = gpu_info["gpu_capability"]
258+
end
259+
end
260+
261+
# Get GPU information if Metal is available
262+
if info["has_metal"]
263+
metal_info = get_metal_gpu_info()
264+
if !isempty(metal_info)
265+
info["gpu_type"] = metal_info["gpu_type"]
266+
info["gpu_count"] = metal_info["gpu_count"]
267+
end
268+
end
129269

130270
if MKL_jll.is_available()
131271
info["mkl_available"] = true
@@ -462,6 +602,26 @@ function get_detailed_system_info()
462602
system_data["metal_available"] = false
463603
end
464604

605+
# Get detailed GPU information if available
606+
if system_data["cuda_available"]
607+
gpu_info = get_cuda_gpu_info()
608+
if !isempty(gpu_info)
609+
system_data["gpu_type"] = gpu_info["gpu_type"]
610+
system_data["gpu_count"] = gpu_info["gpu_count"]
611+
system_data["gpu_memory_gb"] = gpu_info["gpu_memory_gb"]
612+
system_data["gpu_capability"] = gpu_info["gpu_capability"]
613+
if haskey(gpu_info, "gpu_types")
614+
system_data["gpu_types"] = join(gpu_info["gpu_types"], ", ")
615+
end
616+
end
617+
elseif system_data["metal_available"]
618+
metal_info = get_metal_gpu_info()
619+
if !isempty(metal_info)
620+
system_data["gpu_type"] = metal_info["gpu_type"]
621+
system_data["gpu_count"] = metal_info["gpu_count"]
622+
end
623+
end
624+
465625
# Try to detect if CUDA/Metal packages are actually loaded
466626
system_data["cuda_loaded"] = false
467627
system_data["metal_loaded"] = false

lib/LinearSolveAutotune/src/telemetry.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,23 @@ function format_system_info_markdown(system_info::Dict)
236236
# Handle both "has_metal" and "metal_available" keys
237237
push!(lines, "- **Metal Available**: $(get(system_info, "metal_available", get(system_info, "has_metal", false)))")
238238

239+
# GPU Information
240+
if haskey(system_info, "gpu_type")
241+
push!(lines, "- **GPU Type**: $(system_info["gpu_type"])")
242+
if haskey(system_info, "gpu_count")
243+
push!(lines, "- **GPU Count**: $(system_info["gpu_count"])")
244+
end
245+
if haskey(system_info, "gpu_memory_gb")
246+
push!(lines, "- **GPU Memory**: $(system_info["gpu_memory_gb"]) GB")
247+
end
248+
if haskey(system_info, "gpu_capability")
249+
push!(lines, "- **CUDA Capability**: $(system_info["gpu_capability"])")
250+
end
251+
if haskey(system_info, "gpu_types")
252+
push!(lines, "- **All GPU Types**: $(join(system_info["gpu_types"], ", "))")
253+
end
254+
end
255+
239256
# Add package versions section
240257
if haskey(system_info, "package_versions")
241258
push!(lines, "")

0 commit comments

Comments
 (0)