diff --git a/app.py b/app.py
index db0f3a9..ec302cc 100644
--- a/app.py
+++ b/app.py
@@ -4,8 +4,19 @@
 
 from resemble_enhance.enhancer.inference import denoise, enhance
 
+# Device configuration with optimizations for modern GPUs
 if torch.cuda.is_available():
     device = "cuda"
+    # Enable TF32 for Ampere and newer GPUs (3x speedup)
+    # Compatible with PyTorch 2.1+ using new API when available
+    if hasattr(torch.backends.cuda.matmul, 'fp32_precision'):
+        torch.backends.cuda.matmul.fp32_precision = 'tf32'
+        torch.backends.cudnn.conv.fp32_precision = 'tf32'
+    else:
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+    # Enable cuDNN autotuner
+    torch.backends.cudnn.benchmark = True
 else:
     device = "cpu"
 
diff --git a/requirements.txt b/requirements.txt
index 14bca6b..be3c8f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,18 @@
 celluloid>=0.2.0
-deepspeed>=0.12.4
+deepspeed>=0.14.0
 librosa>=0.10.1
 matplotlib>=3.8.1
-numpy>=1.26.2
+numpy>=1.26.2,<2.0.0
 omegaconf>=2.3.0
 pandas>=2.1.3
 ptflops>=0.7.1.2
 rich>=13.7.0
 scipy>=1.11.4
 soundfile>=0.12.1
-torch>=2.1.1
-torchaudio>=2.1.1
-torchvision>=0.16.1
+torch>=2.1.0
+torchaudio>=2.1.0
+torchvision>=0.16.0
+torchcodec>=0.1.0
 tqdm>=4.66.1
 resampy>=0.4.2
 tabulate>=0.8.10