Skip to content

Commit de2200c

Browse files
CopilotMte90
andauthored
Print curl command on embedding timeout with DEBUG mode and proper thread cleanup (#11)
Co-authored-by: Mte90 <[email protected]> Co-authored-by: copilot-swe-agent[bot] <[email protected]>
1 parent b73862a commit de2200c

File tree

6 files changed

+144
-4
lines changed

6 files changed

+144
-4
lines changed

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,7 @@ FILE_WATCHER_INTERVAL=10
4040

4141
# Debounce time in seconds before processing detected changes (default: 5, minimum: 1)
4242
FILE_WATCHER_DEBOUNCE=5
43+
44+
# Debug configuration
45+
# Enable debug mode to generate bash scripts with curl commands on timeout (default: false)
46+
DEBUG=false

ai/openai.py

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,68 @@ def __init__(self,
136136
self.session.headers.update({"Authorization": f"Bearer {self.api_key}"})
137137
self.session.headers.update({"Content-Type": "application/json"})
138138

139+
def _generate_curl_command(self, url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> str:
140+
"""
141+
Generate a curl command for debugging purposes.
142+
Masks the API key for security.
143+
"""
144+
# Start with basic curl command
145+
curl_parts = ["curl", "-X", "POST", f"'{url}'"]
146+
147+
# Add headers
148+
for key, value in headers.items():
149+
if key.lower() == "authorization" and value:
150+
# Mask the API key for security
151+
if value.startswith("Bearer "):
152+
masked_value = f"Bearer <API_KEY_MASKED>"
153+
else:
154+
masked_value = "<API_KEY_MASKED>"
155+
curl_parts.append(f"-H '{key}: {masked_value}'")
156+
else:
157+
curl_parts.append(f"-H '{key}: {value}'")
158+
159+
# Add data payload
160+
payload_json = json.dumps(payload)
161+
# Escape single quotes in the JSON for shell compatibility
162+
payload_json_escaped = payload_json.replace("'", "'\\''")
163+
curl_parts.append(f"-d '{payload_json_escaped}'")
164+
165+
return " \\\n ".join(curl_parts)
166+
167+
def _save_curl_script(self, curl_command: str, request_id: str, file_path: str, chunk_index: int) -> Optional[str]:
168+
"""
169+
Save curl command to a bash script in /tmp for debugging.
170+
Returns the path to the generated script, or None if save failed.
171+
"""
172+
try:
173+
import tempfile
174+
# Create a unique filename based on request_id
175+
script_name = f"embedding_debug_{request_id[:8]}.sh"
176+
script_path = os.path.join("/tmp", script_name)
177+
178+
# Generate script content with shebang and comments
179+
script_content = f"""#!/bin/bash
180+
# Embedding request debug script
181+
# Request ID: {request_id}
182+
# File: {file_path}
183+
# Chunk: {chunk_index}
184+
# Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}
185+
186+
{curl_command}
187+
"""
188+
189+
with open(script_path, 'w') as f:
190+
f.write(script_content)
191+
192+
# Make the script executable
193+
os.chmod(script_path, 0o755)
194+
195+
return script_path
196+
except Exception as e:
197+
_embedding_logger.warning(f"Failed to save curl debug script: {e}")
198+
return None
199+
200+
139201
def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int):
140202
_embedding_logger.debug(
141203
"Embedding request START",
@@ -244,7 +306,38 @@ def embed_text(self, text: str, file_path: str = "<unknown>", chunk_index: int =
244306
except requests.Timeout as e:
245307
elapsed = time.perf_counter() - start
246308
err_msg = f"Timeout after {elapsed:.2f}s: {e}"
247-
_embedding_logger.error("Embedding API Timeout", extra={"request_id": request_id, "error": str(e)})
309+
310+
# Generate curl command for debugging
311+
curl_command = self._generate_curl_command(self.api_url, dict(self.session.headers), payload)
312+
313+
# Save to bash script in /tmp if DEBUG is enabled
314+
script_path = None
315+
if CFG.get("debug"):
316+
script_path = self._save_curl_script(curl_command, request_id, file_path, chunk_index)
317+
318+
_embedding_logger.error(
319+
"Embedding API Timeout",
320+
extra={
321+
"request_id": request_id,
322+
"error": str(e),
323+
"elapsed_s": elapsed,
324+
"curl_command": curl_command,
325+
"debug_script": script_path
326+
}
327+
)
328+
329+
# Print to console for easy debugging
330+
print(f"\n{'='*80}")
331+
print(f"Embedding request timed out after {elapsed:.2f}s")
332+
print(f"Request ID: {request_id}")
333+
print(f"File: {file_path}, Chunk: {chunk_index}")
334+
if script_path:
335+
print(f"\nDebug script saved to: {script_path}")
336+
print(f"Run with: bash {script_path}")
337+
else:
338+
print(f"\nDebug with this curl command:")
339+
print(curl_command)
340+
print(f"{'='*80}\n")
248341
except requests.RequestException as e:
249342
elapsed = time.perf_counter() - start
250343
err_msg = f"RequestException after {elapsed:.2f}s: {e}\n{traceback.format_exc()}"

db/db_writer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(self, database_path, timeout_seconds=30):
2222
self.database_path = database_path
2323
self._q = queue.Queue()
2424
self._stop = threading.Event()
25-
self._thread = threading.Thread(target=self._worker, daemon=True, name=f"DBWriter-{database_path}")
25+
self._thread = threading.Thread(target=self._worker, daemon=False, name=f"DBWriter-{database_path}")
2626
self._timeout_seconds = timeout_seconds
2727
self._thread.start()
2828
_LOG.info(f"DBWriter started for database: {database_path}")

main.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@
99
import sys
1010
import tempfile
1111
import uvicorn
12+
import signal
13+
import atexit
1214

1315
from db import operations as db_operations
1416
from db.operations import get_or_create_project
17+
from db.db_writer import stop_all_writers
1518
from utils.config import CFG
1619
from utils.logger import get_logger
1720
from endpoints.project_endpoints import router as project_router
@@ -25,6 +28,42 @@
2528
_file_watcher = None
2629

2730

31+
def cleanup_on_exit():
32+
"""Cleanup function called on exit or error."""
33+
global _file_watcher
34+
35+
logger.info("Cleaning up resources...")
36+
37+
# Stop FileWatcher
38+
if _file_watcher:
39+
try:
40+
_file_watcher.stop(timeout=2.0)
41+
_file_watcher = None
42+
logger.info("FileWatcher stopped")
43+
except Exception as e:
44+
logger.error(f"Error stopping FileWatcher: {e}")
45+
46+
# Stop all database writers
47+
try:
48+
stop_all_writers()
49+
logger.info("Database writers stopped")
50+
except Exception as e:
51+
logger.error(f"Error stopping database writers: {e}")
52+
53+
54+
def signal_handler(signum, frame):
55+
"""Handle termination signals."""
56+
logger.info(f"Received signal {signum}, shutting down...")
57+
cleanup_on_exit()
58+
sys.exit(0)
59+
60+
61+
# Register cleanup handlers
62+
atexit.register(cleanup_on_exit)
63+
signal.signal(signal.SIGINT, signal_handler)
64+
signal.signal(signal.SIGTERM, signal_handler)
65+
66+
2867
@asynccontextmanager
2968
async def lifespan(app: FastAPI):
3069
"""Application lifespan handler."""
@@ -96,7 +135,8 @@ async def lifespan(app: FastAPI):
96135

97136
yield
98137

99-
# Stop FileWatcher on shutdown
138+
# Cleanup is handled by atexit and signal handlers
139+
# Just ensure FileWatcher stops gracefully here
100140
if _file_watcher:
101141
try:
102142
_file_watcher.stop()

utils/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,7 @@ def _bool_env(name, default):
4242
"file_watcher_enabled": _bool_env("FILE_WATCHER_ENABLED", True),
4343
"file_watcher_interval": _int_env("FILE_WATCHER_INTERVAL", 10),
4444
"file_watcher_debounce": _int_env("FILE_WATCHER_DEBOUNCE", 5),
45+
46+
# Debug configuration
47+
"debug": _bool_env("DEBUG", False),
4548
}

utils/file_watcher.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def start(self) -> None:
123123
self._thread = threading.Thread(
124124
target=self._watch_loop,
125125
name="FileWatcher",
126-
daemon=True
126+
daemon=False
127127
)
128128
self._thread.start()
129129
self.logger.info("FileWatcher started")

0 commit comments

Comments
 (0)