Skip to content

Commit 33e9502

Browse files
committed
Enhance Groq Whisper integration with proxy handling and improved logging
- Implemented low-level monkey patching for httpx to resolve proxy issues during Groq client initialization. - Added detailed logging for startup, client initialization, and transcription processes to aid debugging. - Updated transcription logic to handle different API response formats and ensure compatibility with various Groq client versions. - Enhanced test suite with integration tests and improved environment variable management for API key configuration. - Documented integration testing procedures in README.md, including notes on running tests with real API keys.
1 parent 0154a4a commit 33e9502

File tree

3 files changed

+592
-36
lines changed

3 files changed

+592
-36
lines changed

server/links/groq_whisper/README.md

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,40 @@ result = run(
7070

7171
## Testing
7272

73-
To run the tests:
73+
To run the unit tests (with mocked API):
7474

7575
```bash
76-
# Set a dummy API key for testing
76+
# Set a dummy API key for testing (not required, will be set by the test automatically)
7777
export GROQ_API_KEY=test_api_key_for_testing
7878

7979
# Run the tests
80-
pytest server/links/groq_whisper/test_groq_whisper.py -v
80+
poetry run pytest server/links/groq_whisper/test_groq_whisper.py -v
8181
```
8282

83+
### Integration Testing
84+
85+
The test suite also includes integration tests that make real API calls to Groq if a valid API key is available. By default, these tests are skipped if a valid API key is not provided or if it's the test placeholder.
86+
87+
To run the integration tests:
88+
89+
```bash
90+
# Set your real Groq API key
91+
export GROQ_API_KEY=your_actual_groq_api_key
92+
93+
# Run just the integration tests
94+
poetry run python -m server.links.groq_whisper.test_groq_whisper
95+
96+
# Or run all tests including integration tests
97+
poetry run pytest server/links/groq_whisper/test_groq_whisper.py -v
98+
```
99+
100+
**Important Notes:**
101+
- The GROQ_API_KEY environment variable must be set **before** running the tests
102+
- If you see "Groq API key not configured" in the test output, it means your key wasn't recognized
103+
- The key might not be recognized if you set it in a different shell or after running the tests
104+
- Integration tests create synthetic audio samples which might not yield meaningful transcriptions
105+
- Running integration tests will use your Groq API quota and may incur charges
106+
83107
## Response Format
84108

85109
The Groq Whisper API returns transcription results in the following format:

server/links/groq_whisper/__init__.py

Lines changed: 181 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,60 @@
1111
import tempfile
1212
import time
1313
import os
14+
import sys
1415
from typing import Optional, Dict, Any, Union
15-
16+
import importlib
17+
18+
# -----------------------------------------------------------------------------
19+
# CRITICAL: Low-level monkey patching to resolve proxy issues
20+
# -----------------------------------------------------------------------------
21+
# The issue we're encountering is that httpx is picking up proxy settings from somewhere,
22+
# and they're being injected into the Groq client initialization.
23+
# We need to patch httpx before any Groq imports happen.
24+
25+
# Setup minimal logging for startup
26+
startup_logger = logging.getLogger("server.links.groq_whisper.startup")
27+
startup_logger.setLevel(logging.INFO)
28+
handler = logging.StreamHandler()
29+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
30+
handler.setFormatter(formatter)
31+
startup_logger.addHandler(handler)
32+
33+
# Clear proxy environment variables
34+
proxy_env_vars = ['HTTP_PROXY', 'HTTPS_PROXY', 'NO_PROXY', 'http_proxy', 'https_proxy', 'no_proxy']
35+
for var in proxy_env_vars:
36+
if var in os.environ:
37+
startup_logger.warning(f"Unsetting proxy environment variable: {var}")
38+
del os.environ[var]
39+
40+
# Try to import and patch httpx before any Groq imports happen
41+
try:
42+
import httpx
43+
44+
# Store original Client class
45+
OriginalClient = httpx.Client
46+
47+
# Create a patched Client class
48+
class PatchedClient(OriginalClient):
49+
def __init__(self, *args, **kwargs):
50+
# Remove proxy-related arguments
51+
for key in ['proxies', 'proxy']:
52+
if key in kwargs:
53+
startup_logger.warning(f"Removing '{key}' from httpx.Client initialization")
54+
del kwargs[key]
55+
# Call original init with cleaned kwargs
56+
super().__init__(*args, **kwargs)
57+
58+
# Replace the httpx.Client with our patched version
59+
httpx.Client = PatchedClient
60+
startup_logger.info("Successfully patched httpx.Client to ignore proxy settings")
61+
62+
except ImportError:
63+
startup_logger.warning("Could not import httpx for patching")
64+
except Exception as e:
65+
startup_logger.error(f"Failed to patch httpx: {e}")
66+
67+
# Now we can safely import the rest of the dependencies
1668
import requests
1769
from tenacity import (
1870
RetryError,
@@ -21,6 +73,8 @@
2173
stop_after_attempt,
2274
wait_exponential,
2375
)
76+
77+
# Import Groq client - should now be safe with patched httpx
2478
from groq import Groq
2579

2680
from lib.error_tracking import init_error_tracker
@@ -123,20 +177,78 @@ def transcribe_groq_whisper(dialog: dict, opts: dict) -> Union[Dict[str, Any], A
123177
temp_file.write(content)
124178
temp_file.flush()
125179

126-
# Initialize Groq client with the API key
127-
client = Groq(api_key=opts['API_KEY'])
180+
# Initialize Groq client with API key
181+
api_key = opts['API_KEY']
182+
client = Groq(api_key=api_key)
128183

129-
# Open the audio file for the API request
130-
with open(temp_file.name, 'rb') as audio_file:
131-
# Make the transcription request using the Groq client
132-
response = client.audio.transcriptions.create(
133-
file=audio_file,
134-
model="distil-whisper-large-v3-en",
135-
response_format="verbose_json"
136-
)
184+
# Log client initialization
185+
logger.info(f"Initialized Groq client with version: {getattr(client, '__version__', 'unknown')}")
186+
187+
# Get file name for the API request
188+
file_name = temp_file.name
189+
logger.debug(f"Using temporary file: {file_name}")
190+
191+
# Log available client attributes to help debugging
192+
logger.debug(f"Groq client attributes: {dir(client)}")
193+
194+
# Check for audio transcription capabilities
195+
if hasattr(client, 'audio') and hasattr(client.audio, 'transcriptions'):
196+
logger.info("Using client.audio.transcriptions API")
197+
# Open the audio file for the API request
198+
with open(file_name, 'rb') as audio_file:
199+
# Make the transcription request using the Groq client
200+
response = client.audio.transcriptions.create(
201+
file=(file_name, audio_file.read()),
202+
model="whisper-large-v3-turbo", # Updated model name
203+
response_format="json"
204+
)
205+
206+
# Return the response
207+
return response
208+
elif hasattr(client, 'transcriptions'):
209+
logger.info("Using client.transcriptions API")
210+
# Alternative API structure
211+
with open(file_name, 'rb') as audio_file:
212+
response = client.transcriptions.create(
213+
file=(file_name, audio_file.read()),
214+
model="whisper-large-v3-turbo",
215+
response_format="json"
216+
)
217+
return response
218+
else:
219+
# Fallback for older API versions
220+
logger.warning("Could not find audio transcription API in Groq client. Using audio request directly.")
221+
222+
# Create custom request to the Groq API endpoint directly
223+
url = "https://api.groq.com/openai/v1/audio/transcriptions"
224+
headers = {
225+
"Authorization": f"Bearer {api_key}"
226+
}
227+
228+
with open(file_name, 'rb') as audio_file:
229+
files = {
230+
"file": (file_name, audio_file, "audio/flac")
231+
}
232+
data = {
233+
"model": "whisper-large-v3-turbo",
234+
"response_format": "json"
235+
}
236+
237+
response = requests.post(url, headers=headers, files=files, data=data)
238+
response.raise_for_status() # Raise exception for HTTP errors
239+
240+
# Parse the JSON response
241+
result = response.json()
242+
243+
# Create a simple object with text attribute to match API
244+
class TranscriptionResult:
245+
def __init__(self, text):
246+
self.text = text
247+
248+
return TranscriptionResult(result.get("text", ""))
137249

138-
# Return the response (could be a dict or an object depending on Groq library version)
139-
return response
250+
# Return the response (could be a dict or an object depending on Groq library version)
251+
return response
140252

141253

142254
def run(
@@ -167,6 +279,23 @@ def run(
167279
opts = merged_opts
168280

169281
logger.info("Starting whisper plugin for vCon: %s", vcon_uuid)
282+
283+
# Add enhanced logging for debugging
284+
logger.debug(f"Python version: {sys.version}")
285+
logger.debug(f"Environment: {[(k, v) for k, v in os.environ.items() if 'proxy' in k.lower()]}")
286+
287+
# Log versions of key dependencies if available
288+
try:
289+
import groq
290+
logger.info(f"Groq version: {getattr(groq, '__version__', 'unknown')}")
291+
except ImportError:
292+
logger.warning("Groq package not available for version checking")
293+
294+
try:
295+
import httpx
296+
logger.info(f"httpx version: {httpx.__version__}")
297+
except ImportError:
298+
logger.debug("httpx not available for version checking")
170299

171300
vcon_redis = VconRedis()
172301
vCon = vcon_redis.get_vcon(vcon_uuid)
@@ -221,18 +350,48 @@ def run(
221350
break
222351

223352
logger.info("Transcribed vCon: %s", vCon.uuid)
224-
logger.info(result)
353+
logger.info(f"Transcription result type: {type(result)}")
354+
logger.info(f"Transcription result attributes: {dir(result)}")
355+
356+
# Check if result is a successful transcription
357+
if not hasattr(result, 'text'):
358+
logger.warning(f"Unexpected result format: {result}")
359+
stats_count("conserver.link.groq_whisper.transcription_failures")
360+
break
225361

226362
# Handle different response formats from the Groq API
227363
# The result could be a dict, an object with model_dump method, or something else
228-
transcription_data = result
229-
if hasattr(result, 'model_dump'):
230-
transcription_data = result.model_dump()
231-
elif not isinstance(result, dict):
232-
transcription_data = {
233-
"text": str(result),
234-
"raw_response": str(result)
235-
}
364+
try:
365+
# First log the raw text
366+
logger.info(f"Transcription text: {result.text}")
367+
368+
# Try to convert to a standard format
369+
transcription_data = None
370+
if hasattr(result, 'model_dump'):
371+
# For pydantic models
372+
transcription_data = result.model_dump()
373+
elif hasattr(result, '__dict__'):
374+
# For custom objects with __dict__
375+
transcription_data = vars(result)
376+
elif isinstance(result, dict):
377+
# Already a dict
378+
transcription_data = result
379+
else:
380+
# Fallback to a simple dict with text
381+
transcription_data = {
382+
"text": str(result.text),
383+
"raw_response": str(result)
384+
}
385+
386+
# Ensure text is included
387+
if "text" not in transcription_data and hasattr(result, 'text'):
388+
transcription_data["text"] = result.text
389+
390+
logger.info(f"Processed transcription data: {transcription_data}")
391+
except Exception as e:
392+
logger.error(f"Error processing transcription result: {e}")
393+
# Fallback to a very simple format
394+
transcription_data = {"text": str(getattr(result, 'text', result))}
236395

237396
# Prepare vendor schema without sensitive data
238397
vendor_schema = {

0 commit comments

Comments
 (0)