From 33ec8d30432dda274e39c8f518a462ea60ce723d Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Sun, 10 Aug 2025 16:01:19 -0400 Subject: [PATCH] Add GPU type detection to system information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit enhances LinearSolveAutotune's system information collection to include detailed GPU information when CUDA or Metal GPUs are available. Changes: - Added `get_cuda_gpu_info()` function to retrieve CUDA GPU details: - GPU name/type via CUDA.name() - Number of GPUs - GPU memory in GB via CUDA.totalmem() - CUDA compute capability via CUDA.capability() - All GPU types for multi-GPU systems - Added `get_metal_gpu_info()` function to detect Metal GPUs: - Infers GPU type from CPU model (M1/M2/M3/M4) - Reports GPU count - Updated `get_system_info()` to include GPU information fields - Updated `get_detailed_system_info()` to include GPU fields - Enhanced telemetry markdown formatting to display GPU details The implementation gracefully handles missing GPU hardware or packages, returning empty information when GPUs are not available. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- lib/LinearSolveAutotune/src/gpu_detection.jl | 160 +++++++++++++++++++ lib/LinearSolveAutotune/src/telemetry.jl | 17 ++ 2 files changed, 177 insertions(+) diff --git a/lib/LinearSolveAutotune/src/gpu_detection.jl b/lib/LinearSolveAutotune/src/gpu_detection.jl index 6a2ad8aac..affd2b439 100644 --- a/lib/LinearSolveAutotune/src/gpu_detection.jl +++ b/lib/LinearSolveAutotune/src/gpu_detection.jl @@ -70,6 +70,126 @@ function is_metal_available() end end +""" + get_cuda_gpu_info() + +Get information about CUDA GPU devices if available. +Returns a Dict with GPU type, count, memory, and compute capability. +""" +function get_cuda_gpu_info() + gpu_info = Dict{String, Any}() + + # Check if CUDA extension is loaded + ext = Base.get_extension(LinearSolve, :LinearSolveCUDAExt) + if ext === nothing + return gpu_info + end + + try + # Get CUDA module from the extension + CUDA = ext.CUDA + + # Check if CUDA is functional + if !CUDA.functional() + return gpu_info + end + + # Get device information + devices = collect(CUDA.devices()) + num_devices = length(devices) + + if num_devices > 0 + gpu_info["gpu_count"] = num_devices + + # Get information from the first GPU + first_device = devices[1] + gpu_info["gpu_type"] = CUDA.name(first_device) + + # Convert memory from bytes to GB + total_mem_bytes = CUDA.totalmem(first_device) + gpu_info["gpu_memory_gb"] = round(total_mem_bytes / (1024^3), digits=2) + + # Get compute capability + capability = CUDA.capability(first_device) + gpu_info["gpu_capability"] = "$(capability.major).$(capability.minor)" + + # If multiple GPUs, list all types + if num_devices > 1 + gpu_types = String[] + for dev in devices + push!(gpu_types, CUDA.name(dev)) + end + gpu_info["gpu_types"] = unique(gpu_types) + end + end + catch e + # If there's any error, return empty info + @debug "Error getting CUDA GPU info: $e" + end + + return gpu_info +end + +""" + get_metal_gpu_info() + +Get information about Metal GPU devices if available. +Returns a Dict with GPU type and count. +""" +function get_metal_gpu_info() + gpu_info = Dict{String, Any}() + + # Check if Metal extension is loaded + ext = Base.get_extension(LinearSolve, :LinearSolveMetalExt) + if ext === nothing + return gpu_info + end + + try + # Get Metal module from the extension + Metal = ext.Metal + + # Check if Metal is functional + if !Metal.functional() + return gpu_info + end + + # Get device information + # Metal typically has one device on Apple Silicon + gpu_info["gpu_count"] = 1 + + # Determine GPU type based on system architecture + if Sys.ARCH == :aarch64 + # Try to get more specific model information + cpu_model = "" + cpu_info = Sys.cpu_info() + if !isempty(cpu_info) + cpu_model = cpu_info[1].model + end + + # Infer GPU type from CPU model for Apple Silicon + if contains(lowercase(cpu_model), "m1") + gpu_info["gpu_type"] = "Apple M1 GPU" + elseif contains(lowercase(cpu_model), "m2") + gpu_info["gpu_type"] = "Apple M2 GPU" + elseif contains(lowercase(cpu_model), "m3") + gpu_info["gpu_type"] = "Apple M3 GPU" + elseif contains(lowercase(cpu_model), "m4") + gpu_info["gpu_type"] = "Apple M4 GPU" + else + gpu_info["gpu_type"] = "Apple Silicon GPU" + end + else + gpu_info["gpu_type"] = "Metal GPU" + end + catch e + # If there's any error, return empty info + @debug "Error getting Metal GPU info: $e" + end + + return gpu_info +end + """ get_system_info() @@ -126,6 +246,26 @@ function get_system_info() info["blas_vendor"] = string(LinearAlgebra.BLAS.vendor()) info["has_cuda"] = is_cuda_available() info["has_metal"] = is_metal_available() + + # Get GPU information if CUDA is available + if info["has_cuda"] + gpu_info = get_cuda_gpu_info() + if !isempty(gpu_info) + info["gpu_type"] = gpu_info["gpu_type"] + info["gpu_count"] = gpu_info["gpu_count"] + info["gpu_memory_gb"] = gpu_info["gpu_memory_gb"] + info["gpu_capability"] = gpu_info["gpu_capability"] + end + end + + # Get GPU information if Metal is available + if info["has_metal"] + metal_info = get_metal_gpu_info() + if !isempty(metal_info) + info["gpu_type"] = metal_info["gpu_type"] + info["gpu_count"] = metal_info["gpu_count"] + end + end if MKL_jll.is_available() info["mkl_available"] = true @@ -462,6 +602,26 @@ function get_detailed_system_info() system_data["metal_available"] = false end + # Get detailed GPU information if available + if system_data["cuda_available"] + gpu_info = get_cuda_gpu_info() + if !isempty(gpu_info) + system_data["gpu_type"] = gpu_info["gpu_type"] + system_data["gpu_count"] = gpu_info["gpu_count"] + system_data["gpu_memory_gb"] = gpu_info["gpu_memory_gb"] + system_data["gpu_capability"] = gpu_info["gpu_capability"] + if haskey(gpu_info, "gpu_types") + system_data["gpu_types"] = join(gpu_info["gpu_types"], ", ") + end + end + elseif system_data["metal_available"] + metal_info = get_metal_gpu_info() + if !isempty(metal_info) + system_data["gpu_type"] = metal_info["gpu_type"] + system_data["gpu_count"] = metal_info["gpu_count"] + end + end + # Try to detect if CUDA/Metal packages are actually loaded system_data["cuda_loaded"] = false system_data["metal_loaded"] = false diff --git a/lib/LinearSolveAutotune/src/telemetry.jl b/lib/LinearSolveAutotune/src/telemetry.jl index 32827ac7f..f41634d67 100644 --- a/lib/LinearSolveAutotune/src/telemetry.jl +++ b/lib/LinearSolveAutotune/src/telemetry.jl @@ -236,6 +236,23 @@ function format_system_info_markdown(system_info::Dict) # Handle both "has_metal" and "metal_available" keys push!(lines, "- **Metal Available**: $(get(system_info, "metal_available", get(system_info, "has_metal", false)))") + # GPU Information + if haskey(system_info, "gpu_type") + push!(lines, "- **GPU Type**: $(system_info["gpu_type"])") + if haskey(system_info, "gpu_count") + push!(lines, "- **GPU Count**: $(system_info["gpu_count"])") + end + if haskey(system_info, "gpu_memory_gb") + push!(lines, "- **GPU Memory**: $(system_info["gpu_memory_gb"]) GB") + end + if haskey(system_info, "gpu_capability") + push!(lines, "- **CUDA Capability**: $(system_info["gpu_capability"])") + end + if haskey(system_info, "gpu_types") + push!(lines, "- **All GPU Types**: $(join(system_info["gpu_types"], ", "))") + end + end + # Add package versions section if haskey(system_info, "package_versions") push!(lines, "")