fix: use with_global_system() in Jetson, Tenstorrent, and NVIDIA readers (#121)

inureyes · web-flow · commit 31389b524ddb · 2026-02-08T17:37:01.000+09:00
Replace per-call System::new() instances with with_global_system() to prevent file descriptor leaks in API mode (long-running metrics loop). Changes: - NVIDIA Jetson (nvidia_jetson.rs): - Line 176: get_process_info() now uses with_global_system() - Line 261: get_gpu_processes() helper now uses with_global_system() - Tenstorrent (tenstorrent.rs): - Line 201: get_process_info() now uses with_global_system() - NVIDIA (nvidia.rs): - Removed system: Mutex<System> field from struct - Migrated get_process_info() to use with_global_system() for consistency This follows the standard pattern already used by AMD reader, Apple Silicon reader, and local collector. Fixes #120
diff --git a/src/device/readers/nvidia.rs b/src/device/readers/nvidia.rs
@@ -18,14 +18,13 @@ use crate::device::process_list::{get_all_processes, merge_gpu_processes};
 use crate::device::readers::common_cache::{DetailBuilder, DeviceStaticInfo, MAX_DEVICES};
 use crate::device::types::{GpuInfo, ProcessInfo};
 use crate::device::GpuReader;
-use crate::utils::get_hostname;
+use crate::utils::{get_hostname, with_global_system};
 use chrono::Local;
 use nvml_wrapper::enums::device::UsedGpuMemory;
 use nvml_wrapper::error::NvmlError;
 use nvml_wrapper::{cuda_driver_version_major, cuda_driver_version_minor, Nvml};
 use std::collections::{HashMap, HashSet};
 use std::sync::{Mutex, OnceLock};
-use sysinfo::System;
 
 // Global status for NVML error messages
 static NVML_STATUS: Mutex<Option<String>> = Mutex::new(None);
@@ -39,8 +38,6 @@ pub struct NvidiaGpuReader {
     device_static_info: OnceLock<HashMap<u32, DeviceStaticInfo>>,
     /// Cached NVML handle (initialized once, reused across calls)
     nvml: Mutex<Option<Nvml>>,
-    /// Cached System instance for process info (reused across calls)
-    system: Mutex<System>,
 }
 
 impl Default for NvidiaGpuReader {
@@ -56,7 +53,6 @@ impl NvidiaGpuReader {
             cuda_version: OnceLock::new(),
             device_static_info: OnceLock::new(),
             nvml: Mutex::new(Nvml::init().ok()),
-            system: Mutex::new(System::new()),
         }
     }
 
@@ -231,20 +227,21 @@ impl GpuReader for NvidiaGpuReader {
     fn get_process_info(&self) -> Vec<ProcessInfo> {
         use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, UpdateKind};
 
-        // Reuse the cached System instance
-        let mut system = self.system.lock().unwrap_or_else(|e| e.into_inner());
-        system.refresh_processes_specifics(
-            ProcessesToUpdate::All,
-            true,
-            ProcessRefreshKind::everything().with_user(UpdateKind::Always),
-        );
-        system.refresh_memory();
-
         // Get GPU processes and PIDs using cached NVML handle
         let (gpu_processes, gpu_pids) = self.get_gpu_processes_cached();
 
-        // Get all system processes
-        let mut all_processes = get_all_processes(&system, &gpu_pids);
+        // Use global system instance to avoid file descriptor leak
+        let mut all_processes = with_global_system(|system| {
+            system.refresh_processes_specifics(
+                ProcessesToUpdate::All,
+                true,
+                ProcessRefreshKind::everything().with_user(UpdateKind::Always),
+            );
+            system.refresh_memory();
+
+            // Get all system processes
+            get_all_processes(system, &gpu_pids)
+        });
 
         // Merge GPU information into the process list
         merge_gpu_processes(&mut all_processes, gpu_processes);
diff --git a/src/device/readers/nvidia_jetson.rs b/src/device/readers/nvidia_jetson.rs
@@ -17,12 +17,11 @@ use crate::device::process_list::{get_all_processes, merge_gpu_processes};
 use crate::device::readers::common_cache::{DetailBuilder, DeviceStaticInfo};
 use crate::device::types::{GpuInfo, ProcessInfo};
 use crate::device::GpuReader;
-use crate::utils::{get_hostname, hz_to_mhz, millicelsius_to_celsius};
+use crate::utils::{get_hostname, hz_to_mhz, millicelsius_to_celsius, with_global_system};
 use chrono::Local;
 use std::collections::HashSet;
 use std::fs;
 use std::sync::OnceLock;
-use sysinfo::System;
 
 pub struct NvidiaJetsonGpuReader {
     /// Cached static device information (fetched only once)
@@ -171,22 +170,23 @@ impl GpuReader for NvidiaJetsonGpuReader {
     }
 
     fn get_process_info(&self) -> Vec<ProcessInfo> {
-        // Create a lightweight system instance and only refresh what we need
         use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, UpdateKind};
-        let mut system = System::new();
-        // Refresh processes with user information
-        system.refresh_processes_specifics(
-            ProcessesToUpdate::All,
-            true,
-            ProcessRefreshKind::everything().with_user(UpdateKind::Always),
-        );
-        system.refresh_memory();
 
         // Get GPU processes and PIDs
         let (gpu_processes, gpu_pids) = get_gpu_processes();
 
-        // Get all system processes
-        let mut all_processes = get_all_processes(&system, &gpu_pids);
+        // Use global system instance to avoid file descriptor leak
+        let mut all_processes = with_global_system(|system| {
+            system.refresh_processes_specifics(
+                ProcessesToUpdate::All,
+                true,
+                ProcessRefreshKind::everything().with_user(UpdateKind::Always),
+            );
+            system.refresh_memory();
+
+            // Get all system processes
+            get_all_processes(system, &gpu_pids)
+        });
 
         // Merge GPU information into the process list
         merge_gpu_processes(&mut all_processes, gpu_processes);
@@ -258,41 +258,42 @@ fn get_gpu_processes() -> (Vec<ProcessInfo>, HashSet<u32>) {
             "cuda",
         ];
 
-        let mut system = System::new();
-        system.refresh_memory();
-        for (pid, process) in system.processes() {
-            let process_name = process.name().to_string_lossy().to_lowercase();
-            for gpu_name in &gpu_process_names {
-                if process_name.contains(gpu_name) {
-                    let pid_u32 = pid.as_u32();
-                    gpu_pids.insert(pid_u32);
-
-                    gpu_processes.push(ProcessInfo {
-                        device_id: 0,
-                        device_uuid: "JetsonGPU".to_string(),
-                        pid: pid_u32,
-                        process_name: String::new(), // Will be filled by sysinfo
-                        used_memory: 0, // Can't determine GPU memory usage without nvidia-smi
-                        cpu_percent: 0.0, // Will be filled by sysinfo
-                        memory_percent: 0.0, // Will be filled by sysinfo
-                        memory_rss: 0,  // Will be filled by sysinfo
-                        memory_vms: 0,  // Will be filled by sysinfo
-                        user: String::new(), // Will be filled by sysinfo
-                        state: String::new(), // Will be filled by sysinfo
-                        start_time: String::new(), // Will be filled by sysinfo
-                        cpu_time: 0,    // Will be filled by sysinfo
-                        command: String::new(), // Will be filled by sysinfo
-                        ppid: 0,        // Will be filled by sysinfo
-                        threads: 0,     // Will be filled by sysinfo
-                        uses_gpu: true,
-                        priority: 0,          // Will be filled by sysinfo
-                        nice_value: 0,        // Will be filled by sysinfo
-                        gpu_utilization: 0.0, // Can't determine per-process GPU utilization
-                    });
-                    break;
+        with_global_system(|system| {
+            system.refresh_memory();
+            for (pid, process) in system.processes() {
+                let process_name = process.name().to_string_lossy().to_lowercase();
+                for gpu_name in &gpu_process_names {
+                    if process_name.contains(gpu_name) {
+                        let pid_u32 = pid.as_u32();
+                        gpu_pids.insert(pid_u32);
+
+                        gpu_processes.push(ProcessInfo {
+                            device_id: 0,
+                            device_uuid: "JetsonGPU".to_string(),
+                            pid: pid_u32,
+                            process_name: String::new(), // Will be filled by sysinfo
+                            used_memory: 0, // Can't determine GPU memory usage without nvidia-smi
+                            cpu_percent: 0.0, // Will be filled by sysinfo
+                            memory_percent: 0.0, // Will be filled by sysinfo
+                            memory_rss: 0,  // Will be filled by sysinfo
+                            memory_vms: 0,  // Will be filled by sysinfo
+                            user: String::new(), // Will be filled by sysinfo
+                            state: String::new(), // Will be filled by sysinfo
+                            start_time: String::new(), // Will be filled by sysinfo
+                            cpu_time: 0,    // Will be filled by sysinfo
+                            command: String::new(), // Will be filled by sysinfo
+                            ppid: 0,        // Will be filled by sysinfo
+                            threads: 0,     // Will be filled by sysinfo
+                            uses_gpu: true,
+                            priority: 0,          // Will be filled by sysinfo
+                            nice_value: 0,        // Will be filled by sysinfo
+                            gpu_utilization: 0.0, // Can't determine per-process GPU utilization
+                        });
+                        break;
+                    }
                 }
             }
-        }
+        });
     }
 
     (gpu_processes, gpu_pids)
diff --git a/src/device/readers/tenstorrent.rs b/src/device/readers/tenstorrent.rs
@@ -16,7 +16,7 @@ use crate::device::process_list::{get_all_processes, merge_gpu_processes};
 use crate::device::readers::common_cache::{DetailBuilder, DeviceStaticInfo};
 use crate::device::types::{GpuInfo, ProcessInfo};
 use crate::device::GpuReader;
-use crate::utils::get_hostname;
+use crate::utils::{get_hostname, with_global_system};
 use all_smi_luwen_core;
 use all_smi_luwen_if::chip::{Chip, ChipImpl, Telemetry};
 use all_smi_luwen_if::ChipDetectOptions;
@@ -25,7 +25,6 @@ use chrono::Local;
 use once_cell::sync::Lazy;
 use std::collections::{HashMap, HashSet};
 use std::sync::Mutex;
-use sysinfo::System;
 
 /// Collection method for Tenstorrent NPU metrics
 #[derive(Debug, Clone, Copy)]
@@ -196,21 +195,23 @@ impl GpuReader for TenstorrentReader {
     }
 
     fn get_process_info(&self) -> Vec<ProcessInfo> {
-        // Create system instance and refresh processes
         use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, UpdateKind};
-        let mut system = System::new();
-        system.refresh_processes_specifics(
-            ProcessesToUpdate::All,
-            true,
-            ProcessRefreshKind::everything().with_user(UpdateKind::Always),
-        );
-        system.refresh_memory();
 
         // Get NPU processes (currently empty for Tenstorrent)
         let (npu_processes, npu_pids) = self.get_npu_processes();
 
-        // Get all system processes
-        let mut all_processes = get_all_processes(&system, &npu_pids);
+        // Use global system instance to avoid file descriptor leak
+        let mut all_processes = with_global_system(|system| {
+            system.refresh_processes_specifics(
+                ProcessesToUpdate::All,
+                true,
+                ProcessRefreshKind::everything().with_user(UpdateKind::Always),
+            );
+            system.refresh_memory();
+
+            // Get all system processes
+            get_all_processes(system, &npu_pids)
+        });
 
         // Merge NPU information
         merge_gpu_processes(&mut all_processes, npu_processes);