Skip to content

Commit c60b67c

Browse files
committed
feat: Make telemetry resilient to failures
- Wrapped all telemetry calls in try-except blocks - Added logging when telemetry fails - Ensured validation/publishing operations continue even when telemetry fails - Added logger instances to validator.py, publisher.py, and engine/integration.py This prevents telemetry backend failures (network issues, database errors, etc.) from aborting critical operations like validation and publishing.
1 parent a22568b commit c60b67c

File tree

3 files changed

+64
-40
lines changed

3 files changed

+64
-40
lines changed

xml_lib/engine/integration.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import hashlib
44
import json
5+
import logging
56
from dataclasses import dataclass, field
67
from datetime import datetime
78
from pathlib import Path
@@ -11,6 +12,8 @@
1112

1213
from xml_lib.engine.proofs import GuardrailProof, ProofResult
1314

15+
logger = logging.getLogger(__name__)
16+
1417

1518
@dataclass
1619
class EngineMetrics:
@@ -163,14 +166,17 @@ def integrate_with_telemetry(
163166
if not hasattr(telemetry_sink, "log_event"):
164167
return
165168

166-
telemetry_sink.log_event(
167-
"engine_proof_verification",
168-
total_obligations=proof_result.summary.get("total_obligations", 0),
169-
verified=proof_result.summary.get("verified", 0),
170-
failed=proof_result.summary.get("failed", 0),
171-
success_rate=proof_result.summary.get("success_rate", 0.0),
172-
all_verified=proof_result.all_verified(),
173-
)
169+
try:
170+
telemetry_sink.log_event(
171+
"engine_proof_verification",
172+
total_obligations=proof_result.summary.get("total_obligations", 0),
173+
verified=proof_result.summary.get("verified", 0),
174+
failed=proof_result.summary.get("failed", 0),
175+
success_rate=proof_result.summary.get("success_rate", 0.0),
176+
all_verified=proof_result.all_verified(),
177+
)
178+
except Exception as e:
179+
logger.warning(f"Telemetry logging failed: {e}")
174180

175181
def create_proof_artifact(
176182
self,

xml_lib/publisher.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""XSLT 3.0 publisher for HTML rendering."""
22

3+
import logging
34
import tempfile
45
from dataclasses import dataclass, field
56
from datetime import datetime
@@ -10,6 +11,8 @@
1011
from xml_lib.sanitize import MathPolicy, Sanitizer
1112
from xml_lib.telemetry import TelemetrySink
1213

14+
logger = logging.getLogger(__name__)
15+
1316

1417
@dataclass
1518
class PublishResult:
@@ -299,15 +302,18 @@ def publish(
299302
result.success = False
300303
result.error = str(e)
301304

302-
# Log telemetry
305+
# Log telemetry (failures don't abort publishing)
303306
duration = (datetime.now() - start_time).total_seconds()
304307
if self.telemetry:
305-
self.telemetry.log_publish(
306-
project=str(project_path),
307-
success=result.success,
308-
duration=duration,
309-
output_files=len(result.files),
310-
)
308+
try:
309+
self.telemetry.log_publish(
310+
project=str(project_path),
311+
success=result.success,
312+
duration=duration,
313+
output_files=len(result.files),
314+
)
315+
except Exception as e:
316+
logger.warning(f"Telemetry logging failed: {e}")
311317

312318
return result
313319

xml_lib/validator.py

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import hashlib
44
import io
5+
import logging
56
import sys
67
from dataclasses import dataclass, field
78
from datetime import datetime
@@ -15,6 +16,8 @@
1516
from xml_lib.telemetry import TelemetrySink
1617
from xml_lib.types import ValidationError, ValidationResult
1718

19+
logger = logging.getLogger(__name__)
20+
1821

1922
class ProgressReporter:
2023
"""Progress reporter for large XML file validation."""
@@ -261,17 +264,20 @@ def validate_project(
261264
if progress:
262265
progress.update("Running guardrail checks")
263266

264-
# Log telemetry
267+
# Log telemetry (failures don't abort validation)
265268
duration = (datetime.now() - start_time).total_seconds()
266269
if self.telemetry:
267-
self.telemetry.log_validation(
268-
project=str(project_path),
269-
success=result.is_valid,
270-
duration=duration,
271-
file_count=len(result.validated_files),
272-
error_count=len(result.errors),
273-
warning_count=len(result.warnings),
274-
)
270+
try:
271+
self.telemetry.log_validation(
272+
project=str(project_path),
273+
success=result.is_valid,
274+
duration=duration,
275+
file_count=len(result.validated_files),
276+
error_count=len(result.errors),
277+
warning_count=len(result.warnings),
278+
)
279+
except Exception as e:
280+
logger.warning(f"Telemetry logging failed: {e}")
275281

276282
# Complete progress
277283
if progress:
@@ -317,14 +323,17 @@ def _validate_file(self, path: Path) -> None:
317323
)
318324
duration = (datetime.now() - start_time).total_seconds()
319325
if self.telemetry:
320-
self.telemetry.log_validation(
321-
project=str(path),
322-
success=True,
323-
duration=duration,
324-
file_count=0,
325-
error_count=0,
326-
warning_count=len(result.warnings),
327-
)
326+
try:
327+
self.telemetry.log_validation(
328+
project=str(path),
329+
success=True,
330+
duration=duration,
331+
file_count=0,
332+
error_count=0,
333+
warning_count=len(result.warnings),
334+
)
335+
except Exception as e:
336+
logger.warning(f"Telemetry logging failed: {e}")
328337
self._last_result = result
329338
return
330339
if policy == MathPolicy.SANITIZE and sanitizer:
@@ -380,14 +389,17 @@ def _validate_file(self, path: Path) -> None:
380389

381390
duration = (datetime.now() - start_time).total_seconds()
382391
if self.telemetry:
383-
self.telemetry.log_validation(
384-
project=str(path),
385-
success=result.is_valid,
386-
duration=duration,
387-
file_count=len(result.validated_files),
388-
error_count=len(result.errors),
389-
warning_count=len(result.warnings),
390-
)
392+
try:
393+
self.telemetry.log_validation(
394+
project=str(path),
395+
success=result.is_valid,
396+
duration=duration,
397+
file_count=len(result.validated_files),
398+
error_count=len(result.errors),
399+
warning_count=len(result.warnings),
400+
)
401+
except Exception as e:
402+
logger.warning(f"Telemetry logging failed: {e}")
391403

392404
self._last_result = result
393405

0 commit comments

Comments
 (0)