basetenlabs · parallelipopat · May 20, 2025 · May 20, 2025 · May 20, 2025 · May 20, 2025
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -9,7 +9,7 @@ concurrency:
 
 jobs:
   lint:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
       - uses: ./.github/actions/setup-python/

diff --git a/llama-cpp-server/README.md b/llama-cpp-server/README.md
@@ -23,4 +23,4 @@ cd llama.cpp
 docker build -t local/llama.cpp:server-cuda --target server -f .devops/cuda.Dockerfile .
 ```
 
-You can then push this image to a container registry of your choice and then replace the base_image in the config.yaml
+You can then push this image to a container registry of your choice and then replace the base_image in the config.yaml
diff --git a/llama-cpp-server/config.yaml b/llama-cpp-server/config.yaml
@@ -1,6 +1,6 @@
 base_image:
   image: alphatozeta/llama-cpp-server:0.4
-build_commands: 
+build_commands:
   - pip install git+https://github.com/huggingface/transformers.git hf-xet
 model_metadata:
   repo_id: google/gemma-3-27b-it-qat-q4_0-gguf

diff --git a/orpheus-best-performance/model/model.py b/orpheus-best-performance/model/model.py
@@ -3,7 +3,6 @@
 import torch
 import fastapi
 from snac import SNAC
-import struct
 from pathlib import Path
 import numpy as np
 from fastapi.responses import StreamingResponse
@@ -276,7 +275,7 @@ async def predict(
 
         async def audio_stream(req_id: str):
             token_gen = await self._engine.predict(model_input, request)
-            
+
             if isinstance(token_gen, StreamingResponse):
                 token_gen = token_gen.body_iterator