Merge pull request #4 from aj47/fix/nvidia-detection-cache

aj47 · web-flow · commit 5577b7d220db · 2025-06-12T11:05:45.000+01:00
Fix: Cache NVIDIA detection to prevent repeated torch downloads on Windows
diff --git a/NVIDIA_CACHE_FIX.md b/NVIDIA_CACHE_FIX.md
@@ -0,0 +1,111 @@
+# NVIDIA Detection Cache Fix
+
+## Problem
+
+On Windows development environments, tests were taking very long due to repeated downloads of the 2.2GB+ torch package. This was caused by:
+
+1. **Inconsistent NVIDIA Detection**: The `has_nvidia_smi()` function was returning different values between runs
+2. **Dynamic PyProject Generation**: Multiple modules generate different `pyproject.toml` content based on NVIDIA GPU detection
+3. **uv-iso-env Behavior**: The `uv-iso-env` package performs a "nuke and pave" reinstall whenever the `pyproject.toml` fingerprint changes
+4. **Repeated Downloads**: Each fingerprint change triggered a complete reinstall including the large torch download
+
+## Root Cause
+
+The issue was that `has_nvidia_smi()` was being called multiple times during test runs, and on Windows systems, the detection could be inconsistent due to:
+- System state changes
+- Process timing issues
+- Environment variable changes
+- Path resolution inconsistencies
+
+This caused different `pyproject.toml` content to be generated between runs, changing the fingerprint and triggering reinstalls.
+
+## Solution
+
+### 1. NVIDIA Detection Caching
+
+Enhanced `has_nvidia_smi()` in `src/transcribe_anything/util.py` to:
+- Cache detection results based on system fingerprint
+- Store cache in `~/.transcribe_anything_nvidia_cache.json`
+- Use system information (platform, machine, version) + nvidia-smi existence as fingerprint
+- Provide consistent results across runs for the same system configuration
+
+### 2. Debug Logging
+
+Added debug logging to environment generation functions:
+- `src/transcribe_anything/whisper.py`
+- `src/transcribe_anything/insanley_fast_whisper_reqs.py`
+- `src/transcribe_anything/whisper_mac.py`
+
+Each now logs the MD5 hash of generated `pyproject.toml` content to help track changes.
+
+### 3. Cache Management
+
+Added command-line option to clear cache when needed:
+```bash
+transcribe-anything --clear-nvidia-cache
+```
+
+### 4. Testing
+
+Created comprehensive tests in `tests/test_nvidia_cache.py` to verify:
+- Caching behavior works correctly
+- Cache clearing functionality
+- Different system fingerprints are handled properly
+
+## Files Modified
+
+- `src/transcribe_anything/util.py` - Enhanced NVIDIA detection with caching
+- `src/transcribe_anything/whisper.py` - Added debug logging
+- `src/transcribe_anything/insanley_fast_whisper_reqs.py` - Added debug logging  
+- `src/transcribe_anything/whisper_mac.py` - Added debug logging
+- `src/transcribe_anything/_cmd.py` - Added clear cache command-line option
+- `tests/test_nvidia_cache.py` - New test file for cache functionality
+
+## Usage
+
+### Normal Operation
+The caching is automatic and transparent. The first run will detect NVIDIA availability and cache the result. Subsequent runs will use the cached result, ensuring consistent `pyproject.toml` generation.
+
+### Debugging
+If you suspect caching issues, you can:
+
+1. **View debug output**: The system will print debug messages showing:
+   - Cached vs fresh NVIDIA detection results
+   - PyProject.toml content hashes for each module
+
+2. **Clear cache**: If hardware changes or you need to force re-detection:
+   ```bash
+   transcribe-anything --clear-nvidia-cache
+   ```
+
+### Expected Behavior
+- **First run**: Detects NVIDIA, caches result, generates environment
+- **Subsequent runs**: Uses cached result, generates identical environment
+- **No more repeated downloads**: Same fingerprint = no reinstall needed
+
+## Benefits
+
+1. **Faster Testing**: Eliminates repeated 2.2GB+ torch downloads
+2. **Consistent Behavior**: Same system configuration always produces same results
+3. **Debuggable**: Clear logging shows what's happening
+4. **Manageable**: Easy cache clearing when needed
+5. **Backward Compatible**: No changes to existing API or behavior
+
+## Technical Details
+
+The cache file (`~/.transcribe_anything_nvidia_cache.json`) stores mappings from system fingerprints to detection results:
+
+```json
+{
+  "Windows-AMD64-10.0.19041-nvidia_smi:true": true,
+  "Linux-x86_64-5.4.0-nvidia_smi:false": false
+}
+```
+
+The system fingerprint includes:
+- Platform system (Windows, Linux, Darwin)
+- Machine architecture (AMD64, x86_64, arm64)
+- Platform version
+- Whether nvidia-smi executable exists
+
+This ensures that hardware or driver changes are properly detected while maintaining consistency for the same configuration.
diff --git a/src/transcribe_anything/_cmd.py b/src/transcribe_anything/_cmd.py
@@ -72,6 +72,11 @@ def parse_arguments() -> argparse.Namespace:
         help=("Query the GPU and store it in the given path," " warning takes a long time on first load!"),
         type=Path,
     )
+    parser.add_argument(
+        "--clear-nvidia-cache",
+        help="Clear the NVIDIA detection cache to force re-detection",
+        action="store_true",
+    )
     parser.add_argument(
         "--output_dir",
         help="Provide output directory name,d efaults to the filename of the file.",
@@ -144,7 +149,7 @@ def parse_arguments() -> argparse.Namespace:
     )
     # add extra options that are passed into the transcribe function
     args, unknown = parser.parse_known_args()
-    if args.url_or_file is None and args.query_gpu_json_path is None:
+    if args.url_or_file is None and args.query_gpu_json_path is None and not getattr(args, 'clear_nvidia_cache', False):
         print("No file or url provided")
         parser.print_help()
         sys.exit(1)
@@ -173,6 +178,14 @@ def main() -> int:
     """Main entry point for the command line tool."""
     args = parse_arguments()
     unknown = args.unknown
+
+    # Handle clear NVIDIA cache option
+    if getattr(args, 'clear_nvidia_cache', False):
+        from transcribe_anything.util import clear_nvidia_cache
+        clear_nvidia_cache()
+        print("NVIDIA detection cache cleared successfully.")
+        return 0
+
     if args.query_gpu_json_path is not None:
         from transcribe_anything.insanely_fast_whisper import get_cuda_info
 
diff --git a/src/transcribe_anything/insanley_fast_whisper_reqs.py b/src/transcribe_anything/insanley_fast_whisper_reqs.py
@@ -3,6 +3,7 @@
 """
 
 import sys
+import hashlib
 from pathlib import Path
 
 from iso_env import IsoEnv, IsoEnvArgs, PyProjectToml  # type: ignore
@@ -254,6 +255,11 @@ def get_environment(has_nvidia: bool | None = None) -> IsoEnv:
         content_lines.append("explicit = true")
 
     content = "\n".join(content_lines)
+
+    # Debug: Log the pyproject.toml content hash to track changes
+    content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()[:8]
+    print(f"Debug: insanley_fast_whisper_reqs.py pyproject.toml hash: {content_hash}, has_nvidia: {has_nvidia}, is_windows: {is_windows}", file=sys.stderr)
+
     build_info = PyProjectToml(content)
     args = IsoEnvArgs(venv_path=venv_dir, build_info=build_info)
     env = IsoEnv(args)
diff --git a/src/transcribe_anything/util.py b/src/transcribe_anything/util.py
@@ -8,9 +8,16 @@
 import shutil
 from html import unescape
 from urllib.parse import unquote
+from pathlib import Path
+import json
+import sys
 
 PROCESS_TIMEOUT = 4 * 60 * 60
 
+# Cache file for NVIDIA detection to ensure consistency across runs
+_NVIDIA_CACHE_FILE = Path.home() / ".transcribe_anything_nvidia_cache.json"
+_NVIDIA_DETECTION_CACHE = None
+
 
 def is_mac_arm() -> bool:
     """Returns true if mac arm like m1, m2, etc."""
@@ -62,6 +69,75 @@ def chop_double_extension(path_name) -> str:
     return ".".join(parts + [ext])
 
 
+def _get_system_fingerprint() -> str:
+    """Get a fingerprint of the system to detect hardware changes."""
+    # Include platform info and check for nvidia-smi existence
+    platform_info = f"{platform.system()}-{platform.machine()}-{platform.version()}"
+    nvidia_smi_exists = shutil.which("nvidia-smi") is not None
+    return f"{platform_info}-nvidia_smi:{nvidia_smi_exists}"
+
+
+def _load_nvidia_cache() -> dict:
+    """Load the NVIDIA detection cache from disk."""
+    try:
+        if _NVIDIA_CACHE_FILE.exists():
+            with open(_NVIDIA_CACHE_FILE, 'r', encoding='utf-8') as f:
+                return json.load(f)
+    except (json.JSONDecodeError, OSError) as e:
+        print(f"Warning: Failed to load NVIDIA cache: {e}", file=sys.stderr)
+    return {}
+
+
+def _save_nvidia_cache(cache_data: dict) -> None:
+    """Save the NVIDIA detection cache to disk."""
+    try:
+        with open(_NVIDIA_CACHE_FILE, 'w', encoding='utf-8') as f:
+            json.dump(cache_data, f, indent=2)
+    except OSError as e:
+        print(f"Warning: Failed to save NVIDIA cache: {e}", file=sys.stderr)
+
+
 def has_nvidia_smi() -> bool:
-    """Returns True if nvidia-smi is installed."""
-    return shutil.which("nvidia-smi") is not None
+    """
+    Returns True if nvidia-smi is installed.
+
+    This function caches the result based on system fingerprint to ensure
+    consistency across runs and avoid triggering unnecessary reinstalls
+    in uv-iso-env environments.
+    """
+    global _NVIDIA_DETECTION_CACHE
+
+    # Get current system fingerprint
+    current_fingerprint = _get_system_fingerprint()
+
+    # Load cache if not already loaded
+    if _NVIDIA_DETECTION_CACHE is None:
+        _NVIDIA_DETECTION_CACHE = _load_nvidia_cache()
+
+    # Check if we have a cached result for this system fingerprint
+    if current_fingerprint in _NVIDIA_DETECTION_CACHE:
+        cached_result = _NVIDIA_DETECTION_CACHE[current_fingerprint]
+        print(f"Debug: Using cached NVIDIA detection result: {cached_result} for fingerprint: {current_fingerprint}", file=sys.stderr)
+        return cached_result
+
+    # Perform actual detection
+    nvidia_available = shutil.which("nvidia-smi") is not None
+
+    # Cache the result
+    _NVIDIA_DETECTION_CACHE[current_fingerprint] = nvidia_available
+    _save_nvidia_cache(_NVIDIA_DETECTION_CACHE)
+
+    print(f"Debug: Detected NVIDIA availability: {nvidia_available} for fingerprint: {current_fingerprint}", file=sys.stderr)
+    return nvidia_available
+
+
+def clear_nvidia_cache() -> None:
+    """Clear the NVIDIA detection cache. Useful for testing or when hardware changes."""
+    global _NVIDIA_DETECTION_CACHE
+    _NVIDIA_DETECTION_CACHE = None
+    try:
+        if _NVIDIA_CACHE_FILE.exists():
+            _NVIDIA_CACHE_FILE.unlink()
+            print("NVIDIA detection cache cleared.", file=sys.stderr)
+    except OSError as e:
+        print(f"Warning: Failed to clear NVIDIA cache: {e}", file=sys.stderr)
diff --git a/src/transcribe_anything/whisper.py b/src/transcribe_anything/whisper.py
@@ -5,6 +5,7 @@
 import subprocess
 import sys
 import time
+import hashlib
 from pathlib import Path
 from typing import Optional
 
@@ -67,6 +68,11 @@ def get_environment() -> IsoEnv:
     # else:
     #     deps.append(f"torch=={TENSOR_VERSION}")
     content = "\n".join(content_lines)
+
+    # Debug: Log the pyproject.toml content hash to track changes
+    content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()[:8]
+    print(f"Debug: whisper.py pyproject.toml hash: {content_hash}, needs_extra_index: {needs_extra_index}", file=sys.stderr)
+
     pyproject_toml = PyProjectToml(content)
     args = IsoEnvArgs(venv_dir, build_info=pyproject_toml)
     env = IsoEnv(args)
diff --git a/src/transcribe_anything/whisper_mac.py b/src/transcribe_anything/whisper_mac.py
@@ -5,6 +5,7 @@
 import json
 import os
 import sys
+import hashlib
 from pathlib import Path
 from typing import Any, Dict, Optional
 
@@ -46,6 +47,11 @@ def get_environment() -> IsoEnv:
     content_lines.append('  "numpy",')
     content_lines.append("]")
     content = "\n".join(content_lines)
+
+    # Debug: Log the pyproject.toml content hash to track changes
+    content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()[:8]
+    print(f"Debug: whisper_mac.py pyproject.toml hash: {content_hash}", file=sys.stderr)
+
     pyproject_toml = PyProjectToml(content)
     args = IsoEnvArgs(venv_dir, build_info=pyproject_toml)
     env = IsoEnv(args)
diff --git a/test_nvidia_fix.py b/test_nvidia_fix.py
diff --git a/tests/test_nvidia_cache.py b/tests/test_nvidia_cache.py