Skip to content

Commit 33ec8d3

Browse files
Add GPU type detection to system information
This commit enhances LinearSolveAutotune's system information collection to include detailed GPU information when CUDA or Metal GPUs are available. Changes: - Added `get_cuda_gpu_info()` function to retrieve CUDA GPU details: - GPU name/type via CUDA.name() - Number of GPUs - GPU memory in GB via CUDA.totalmem() - CUDA compute capability via CUDA.capability() - All GPU types for multi-GPU systems - Added `get_metal_gpu_info()` function to detect Metal GPUs: - Infers GPU type from CPU model (M1/M2/M3/M4) - Reports GPU count - Updated `get_system_info()` to include GPU information fields - Updated `get_detailed_system_info()` to include GPU fields - Enhanced telemetry markdown formatting to display GPU details The implementation gracefully handles missing GPU hardware or packages, returning empty information when GPUs are not available. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent ac13bfd commit 33ec8d3

File tree

2 files changed

+177
-0
lines changed

2 files changed

+177
-0
lines changed

lib/LinearSolveAutotune/src/gpu_detection.jl

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,126 @@ function is_metal_available()
7070
end
7171
end
7272

73+
"""
74+
get_cuda_gpu_info()
75+
76+
Get information about CUDA GPU devices if available.
77+
Returns a Dict with GPU type, count, memory, and compute capability.
78+
"""
79+
function get_cuda_gpu_info()
80+
gpu_info = Dict{String, Any}()
81+
82+
# Check if CUDA extension is loaded
83+
ext = Base.get_extension(LinearSolve, :LinearSolveCUDAExt)
84+
if ext === nothing
85+
return gpu_info
86+
end
87+
88+
try
89+
# Get CUDA module from the extension
90+
CUDA = ext.CUDA
91+
92+
# Check if CUDA is functional
93+
if !CUDA.functional()
94+
return gpu_info
95+
end
96+
97+
# Get device information
98+
devices = collect(CUDA.devices())
99+
num_devices = length(devices)
100+
101+
if num_devices > 0
102+
gpu_info["gpu_count"] = num_devices
103+
104+
# Get information from the first GPU
105+
first_device = devices[1]
106+
gpu_info["gpu_type"] = CUDA.name(first_device)
107+
108+
# Convert memory from bytes to GB
109+
total_mem_bytes = CUDA.totalmem(first_device)
110+
gpu_info["gpu_memory_gb"] = round(total_mem_bytes / (1024^3), digits=2)
111+
112+
# Get compute capability
113+
capability = CUDA.capability(first_device)
114+
gpu_info["gpu_capability"] = "$(capability.major).$(capability.minor)"
115+
116+
# If multiple GPUs, list all types
117+
if num_devices > 1
118+
gpu_types = String[]
119+
for dev in devices
120+
push!(gpu_types, CUDA.name(dev))
121+
end
122+
gpu_info["gpu_types"] = unique(gpu_types)
123+
end
124+
end
125+
catch e
126+
# If there's any error, return empty info
127+
@debug "Error getting CUDA GPU info: $e"
128+
end
129+
130+
return gpu_info
131+
end
132+
133+
"""
134+
get_metal_gpu_info()
135+
136+
Get information about Metal GPU devices if available.
137+
Returns a Dict with GPU type and count.
138+
"""
139+
function get_metal_gpu_info()
140+
gpu_info = Dict{String, Any}()
141+
142+
# Check if Metal extension is loaded
143+
ext = Base.get_extension(LinearSolve, :LinearSolveMetalExt)
144+
if ext === nothing
145+
return gpu_info
146+
end
147+
148+
try
149+
# Get Metal module from the extension
150+
Metal = ext.Metal
151+
152+
# Check if Metal is functional
153+
if !Metal.functional()
154+
return gpu_info
155+
end
156+
157+
# Get device information
158+
# Metal typically has one device on Apple Silicon
159+
gpu_info["gpu_count"] = 1
160+
161+
# Determine GPU type based on system architecture
162+
if Sys.ARCH == :aarch64
163+
# Try to get more specific model information
164+
cpu_model = ""
165+
cpu_info = Sys.cpu_info()
166+
if !isempty(cpu_info)
167+
cpu_model = cpu_info[1].model
168+
end
169+
170+
# Infer GPU type from CPU model for Apple Silicon
171+
if contains(lowercase(cpu_model), "m1")
172+
gpu_info["gpu_type"] = "Apple M1 GPU"
173+
elseif contains(lowercase(cpu_model), "m2")
174+
gpu_info["gpu_type"] = "Apple M2 GPU"
175+
elseif contains(lowercase(cpu_model), "m3")
176+
gpu_info["gpu_type"] = "Apple M3 GPU"
177+
elseif contains(lowercase(cpu_model), "m4")
178+
gpu_info["gpu_type"] = "Apple M4 GPU"
179+
else
180+
gpu_info["gpu_type"] = "Apple Silicon GPU"
181+
end
182+
else
183+
gpu_info["gpu_type"] = "Metal GPU"
184+
end
185+
catch e
186+
# If there's any error, return empty info
187+
@debug "Error getting Metal GPU info: $e"
188+
end
189+
190+
return gpu_info
191+
end
192+
73193
"""
74194
get_system_info()
75195
@@ -126,6 +246,26 @@ function get_system_info()
126246
info["blas_vendor"] = string(LinearAlgebra.BLAS.vendor())
127247
info["has_cuda"] = is_cuda_available()
128248
info["has_metal"] = is_metal_available()
249+
250+
# Get GPU information if CUDA is available
251+
if info["has_cuda"]
252+
gpu_info = get_cuda_gpu_info()
253+
if !isempty(gpu_info)
254+
info["gpu_type"] = gpu_info["gpu_type"]
255+
info["gpu_count"] = gpu_info["gpu_count"]
256+
info["gpu_memory_gb"] = gpu_info["gpu_memory_gb"]
257+
info["gpu_capability"] = gpu_info["gpu_capability"]
258+
end
259+
end
260+
261+
# Get GPU information if Metal is available
262+
if info["has_metal"]
263+
metal_info = get_metal_gpu_info()
264+
if !isempty(metal_info)
265+
info["gpu_type"] = metal_info["gpu_type"]
266+
info["gpu_count"] = metal_info["gpu_count"]
267+
end
268+
end
129269

130270
if MKL_jll.is_available()
131271
info["mkl_available"] = true
@@ -462,6 +602,26 @@ function get_detailed_system_info()
462602
system_data["metal_available"] = false
463603
end
464604

605+
# Get detailed GPU information if available
606+
if system_data["cuda_available"]
607+
gpu_info = get_cuda_gpu_info()
608+
if !isempty(gpu_info)
609+
system_data["gpu_type"] = gpu_info["gpu_type"]
610+
system_data["gpu_count"] = gpu_info["gpu_count"]
611+
system_data["gpu_memory_gb"] = gpu_info["gpu_memory_gb"]
612+
system_data["gpu_capability"] = gpu_info["gpu_capability"]
613+
if haskey(gpu_info, "gpu_types")
614+
system_data["gpu_types"] = join(gpu_info["gpu_types"], ", ")
615+
end
616+
end
617+
elseif system_data["metal_available"]
618+
metal_info = get_metal_gpu_info()
619+
if !isempty(metal_info)
620+
system_data["gpu_type"] = metal_info["gpu_type"]
621+
system_data["gpu_count"] = metal_info["gpu_count"]
622+
end
623+
end
624+
465625
# Try to detect if CUDA/Metal packages are actually loaded
466626
system_data["cuda_loaded"] = false
467627
system_data["metal_loaded"] = false

lib/LinearSolveAutotune/src/telemetry.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,23 @@ function format_system_info_markdown(system_info::Dict)
236236
# Handle both "has_metal" and "metal_available" keys
237237
push!(lines, "- **Metal Available**: $(get(system_info, "metal_available", get(system_info, "has_metal", false)))")
238238

239+
# GPU Information
240+
if haskey(system_info, "gpu_type")
241+
push!(lines, "- **GPU Type**: $(system_info["gpu_type"])")
242+
if haskey(system_info, "gpu_count")
243+
push!(lines, "- **GPU Count**: $(system_info["gpu_count"])")
244+
end
245+
if haskey(system_info, "gpu_memory_gb")
246+
push!(lines, "- **GPU Memory**: $(system_info["gpu_memory_gb"]) GB")
247+
end
248+
if haskey(system_info, "gpu_capability")
249+
push!(lines, "- **CUDA Capability**: $(system_info["gpu_capability"])")
250+
end
251+
if haskey(system_info, "gpu_types")
252+
push!(lines, "- **All GPU Types**: $(join(system_info["gpu_types"], ", "))")
253+
end
254+
end
255+
239256
# Add package versions section
240257
if haskey(system_info, "package_versions")
241258
push!(lines, "")

0 commit comments

Comments
 (0)