|
11 | 11 | import tempfile |
12 | 12 | import time |
13 | 13 | import os |
| 14 | +import sys |
14 | 15 | from typing import Optional, Dict, Any, Union |
15 | | - |
| 16 | +import importlib |
| 17 | + |
| 18 | +# ----------------------------------------------------------------------------- |
| 19 | +# CRITICAL: Low-level monkey patching to resolve proxy issues |
| 20 | +# ----------------------------------------------------------------------------- |
| 21 | +# The issue we're encountering is that httpx is picking up proxy settings from somewhere, |
| 22 | +# and they're being injected into the Groq client initialization. |
| 23 | +# We need to patch httpx before any Groq imports happen. |
| 24 | + |
| 25 | +# Setup minimal logging for startup |
| 26 | +startup_logger = logging.getLogger("server.links.groq_whisper.startup") |
| 27 | +startup_logger.setLevel(logging.INFO) |
| 28 | +handler = logging.StreamHandler() |
| 29 | +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
| 30 | +handler.setFormatter(formatter) |
| 31 | +startup_logger.addHandler(handler) |
| 32 | + |
| 33 | +# Clear proxy environment variables |
| 34 | +proxy_env_vars = ['HTTP_PROXY', 'HTTPS_PROXY', 'NO_PROXY', 'http_proxy', 'https_proxy', 'no_proxy'] |
| 35 | +for var in proxy_env_vars: |
| 36 | + if var in os.environ: |
| 37 | + startup_logger.warning(f"Unsetting proxy environment variable: {var}") |
| 38 | + del os.environ[var] |
| 39 | + |
| 40 | +# Try to import and patch httpx before any Groq imports happen |
| 41 | +try: |
| 42 | + import httpx |
| 43 | + |
| 44 | + # Store original Client class |
| 45 | + OriginalClient = httpx.Client |
| 46 | + |
| 47 | + # Create a patched Client class |
| 48 | + class PatchedClient(OriginalClient): |
| 49 | + def __init__(self, *args, **kwargs): |
| 50 | + # Remove proxy-related arguments |
| 51 | + for key in ['proxies', 'proxy']: |
| 52 | + if key in kwargs: |
| 53 | + startup_logger.warning(f"Removing '{key}' from httpx.Client initialization") |
| 54 | + del kwargs[key] |
| 55 | + # Call original init with cleaned kwargs |
| 56 | + super().__init__(*args, **kwargs) |
| 57 | + |
| 58 | + # Replace the httpx.Client with our patched version |
| 59 | + httpx.Client = PatchedClient |
| 60 | + startup_logger.info("Successfully patched httpx.Client to ignore proxy settings") |
| 61 | + |
| 62 | +except ImportError: |
| 63 | + startup_logger.warning("Could not import httpx for patching") |
| 64 | +except Exception as e: |
| 65 | + startup_logger.error(f"Failed to patch httpx: {e}") |
| 66 | + |
| 67 | +# Now we can safely import the rest of the dependencies |
16 | 68 | import requests |
17 | 69 | from tenacity import ( |
18 | 70 | RetryError, |
|
21 | 73 | stop_after_attempt, |
22 | 74 | wait_exponential, |
23 | 75 | ) |
| 76 | + |
| 77 | +# Import Groq client - should now be safe with patched httpx |
24 | 78 | from groq import Groq |
25 | 79 |
|
26 | 80 | from lib.error_tracking import init_error_tracker |
@@ -123,20 +177,78 @@ def transcribe_groq_whisper(dialog: dict, opts: dict) -> Union[Dict[str, Any], A |
123 | 177 | temp_file.write(content) |
124 | 178 | temp_file.flush() |
125 | 179 |
|
126 | | - # Initialize Groq client with the API key |
127 | | - client = Groq(api_key=opts['API_KEY']) |
| 180 | + # Initialize Groq client with API key |
| 181 | + api_key = opts['API_KEY'] |
| 182 | + client = Groq(api_key=api_key) |
128 | 183 |
|
129 | | - # Open the audio file for the API request |
130 | | - with open(temp_file.name, 'rb') as audio_file: |
131 | | - # Make the transcription request using the Groq client |
132 | | - response = client.audio.transcriptions.create( |
133 | | - file=audio_file, |
134 | | - model="distil-whisper-large-v3-en", |
135 | | - response_format="verbose_json" |
136 | | - ) |
| 184 | + # Log client initialization |
| 185 | + logger.info(f"Initialized Groq client with version: {getattr(client, '__version__', 'unknown')}") |
| 186 | + |
| 187 | + # Get file name for the API request |
| 188 | + file_name = temp_file.name |
| 189 | + logger.debug(f"Using temporary file: {file_name}") |
| 190 | + |
| 191 | + # Log available client attributes to help debugging |
| 192 | + logger.debug(f"Groq client attributes: {dir(client)}") |
| 193 | + |
| 194 | + # Check for audio transcription capabilities |
| 195 | + if hasattr(client, 'audio') and hasattr(client.audio, 'transcriptions'): |
| 196 | + logger.info("Using client.audio.transcriptions API") |
| 197 | + # Open the audio file for the API request |
| 198 | + with open(file_name, 'rb') as audio_file: |
| 199 | + # Make the transcription request using the Groq client |
| 200 | + response = client.audio.transcriptions.create( |
| 201 | + file=(file_name, audio_file.read()), |
| 202 | + model="whisper-large-v3-turbo", # Updated model name |
| 203 | + response_format="json" |
| 204 | + ) |
| 205 | + |
| 206 | + # Return the response |
| 207 | + return response |
| 208 | + elif hasattr(client, 'transcriptions'): |
| 209 | + logger.info("Using client.transcriptions API") |
| 210 | + # Alternative API structure |
| 211 | + with open(file_name, 'rb') as audio_file: |
| 212 | + response = client.transcriptions.create( |
| 213 | + file=(file_name, audio_file.read()), |
| 214 | + model="whisper-large-v3-turbo", |
| 215 | + response_format="json" |
| 216 | + ) |
| 217 | + return response |
| 218 | + else: |
| 219 | + # Fallback for older API versions |
| 220 | + logger.warning("Could not find audio transcription API in Groq client. Using audio request directly.") |
| 221 | + |
| 222 | + # Create custom request to the Groq API endpoint directly |
| 223 | + url = "https://api.groq.com/openai/v1/audio/transcriptions" |
| 224 | + headers = { |
| 225 | + "Authorization": f"Bearer {api_key}" |
| 226 | + } |
| 227 | + |
| 228 | + with open(file_name, 'rb') as audio_file: |
| 229 | + files = { |
| 230 | + "file": (file_name, audio_file, "audio/flac") |
| 231 | + } |
| 232 | + data = { |
| 233 | + "model": "whisper-large-v3-turbo", |
| 234 | + "response_format": "json" |
| 235 | + } |
| 236 | + |
| 237 | + response = requests.post(url, headers=headers, files=files, data=data) |
| 238 | + response.raise_for_status() # Raise exception for HTTP errors |
| 239 | + |
| 240 | + # Parse the JSON response |
| 241 | + result = response.json() |
| 242 | + |
| 243 | + # Create a simple object with text attribute to match API |
| 244 | + class TranscriptionResult: |
| 245 | + def __init__(self, text): |
| 246 | + self.text = text |
| 247 | + |
| 248 | + return TranscriptionResult(result.get("text", "")) |
137 | 249 |
|
138 | | - # Return the response (could be a dict or an object depending on Groq library version) |
139 | | - return response |
| 250 | + # Return the response (could be a dict or an object depending on Groq library version) |
| 251 | + return response |
140 | 252 |
|
141 | 253 |
|
142 | 254 | def run( |
@@ -167,6 +279,23 @@ def run( |
167 | 279 | opts = merged_opts |
168 | 280 |
|
169 | 281 | logger.info("Starting whisper plugin for vCon: %s", vcon_uuid) |
| 282 | + |
| 283 | + # Add enhanced logging for debugging |
| 284 | + logger.debug(f"Python version: {sys.version}") |
| 285 | + logger.debug(f"Environment: {[(k, v) for k, v in os.environ.items() if 'proxy' in k.lower()]}") |
| 286 | + |
| 287 | + # Log versions of key dependencies if available |
| 288 | + try: |
| 289 | + import groq |
| 290 | + logger.info(f"Groq version: {getattr(groq, '__version__', 'unknown')}") |
| 291 | + except ImportError: |
| 292 | + logger.warning("Groq package not available for version checking") |
| 293 | + |
| 294 | + try: |
| 295 | + import httpx |
| 296 | + logger.info(f"httpx version: {httpx.__version__}") |
| 297 | + except ImportError: |
| 298 | + logger.debug("httpx not available for version checking") |
170 | 299 |
|
171 | 300 | vcon_redis = VconRedis() |
172 | 301 | vCon = vcon_redis.get_vcon(vcon_uuid) |
@@ -221,18 +350,48 @@ def run( |
221 | 350 | break |
222 | 351 |
|
223 | 352 | logger.info("Transcribed vCon: %s", vCon.uuid) |
224 | | - logger.info(result) |
| 353 | + logger.info(f"Transcription result type: {type(result)}") |
| 354 | + logger.info(f"Transcription result attributes: {dir(result)}") |
| 355 | + |
| 356 | + # Check if result is a successful transcription |
| 357 | + if not hasattr(result, 'text'): |
| 358 | + logger.warning(f"Unexpected result format: {result}") |
| 359 | + stats_count("conserver.link.groq_whisper.transcription_failures") |
| 360 | + break |
225 | 361 |
|
226 | 362 | # Handle different response formats from the Groq API |
227 | 363 | # The result could be a dict, an object with model_dump method, or something else |
228 | | - transcription_data = result |
229 | | - if hasattr(result, 'model_dump'): |
230 | | - transcription_data = result.model_dump() |
231 | | - elif not isinstance(result, dict): |
232 | | - transcription_data = { |
233 | | - "text": str(result), |
234 | | - "raw_response": str(result) |
235 | | - } |
| 364 | + try: |
| 365 | + # First log the raw text |
| 366 | + logger.info(f"Transcription text: {result.text}") |
| 367 | + |
| 368 | + # Try to convert to a standard format |
| 369 | + transcription_data = None |
| 370 | + if hasattr(result, 'model_dump'): |
| 371 | + # For pydantic models |
| 372 | + transcription_data = result.model_dump() |
| 373 | + elif hasattr(result, '__dict__'): |
| 374 | + # For custom objects with __dict__ |
| 375 | + transcription_data = vars(result) |
| 376 | + elif isinstance(result, dict): |
| 377 | + # Already a dict |
| 378 | + transcription_data = result |
| 379 | + else: |
| 380 | + # Fallback to a simple dict with text |
| 381 | + transcription_data = { |
| 382 | + "text": str(result.text), |
| 383 | + "raw_response": str(result) |
| 384 | + } |
| 385 | + |
| 386 | + # Ensure text is included |
| 387 | + if "text" not in transcription_data and hasattr(result, 'text'): |
| 388 | + transcription_data["text"] = result.text |
| 389 | + |
| 390 | + logger.info(f"Processed transcription data: {transcription_data}") |
| 391 | + except Exception as e: |
| 392 | + logger.error(f"Error processing transcription result: {e}") |
| 393 | + # Fallback to a very simple format |
| 394 | + transcription_data = {"text": str(getattr(result, 'text', result))} |
236 | 395 |
|
237 | 396 | # Prepare vendor schema without sensitive data |
238 | 397 | vendor_schema = { |
|
0 commit comments