|
1 | | -from datetime import datetime |
2 | 1 | from enum import Enum |
3 | 2 | from typing import Any |
4 | 3 |
|
|
7 | 6 |
|
8 | 7 | from ee.hogai.session_summaries import SummaryValidationError |
9 | 8 | from ee.hogai.session_summaries.constants import HALLUCINATED_EVENTS_MIN_RATIO |
10 | | -from ee.hogai.session_summaries.utils import get_column_index, prepare_datetime, unpack_full_event_id |
| 9 | +from ee.hogai.session_summaries.utils import ( |
| 10 | + calculate_time_since_start, |
| 11 | + get_column_index, |
| 12 | + prepare_datetime, |
| 13 | + unpack_full_event_id, |
| 14 | +) |
11 | 15 | from ee.hogai.utils.yaml import load_yaml_from_raw_llm_content |
12 | 16 |
|
13 | 17 | logger = structlog.get_logger(__name__) |
@@ -274,7 +278,8 @@ def _remove_hallucinated_events( |
274 | 278 | f"Too many hallucinated events ({len(hallucinated_events)}/{total_summary_events}) for session id ({session_id})" |
275 | 279 | f"in the raw session summary: {[x[-1] for x in hallucinated_events]} " |
276 | 280 | ) |
277 | | - logger.error(msg, session_id=session_id, signals_type="session-summaries") |
| 281 | + if final_validation: |
| 282 | + logger.error(msg, session_id=session_id, signals_type="session-summaries") |
278 | 283 | raise SummaryValidationError(msg) |
279 | 284 | # Reverse to not break indexes |
280 | 285 | for group_index, event_index, event in reversed(hallucinated_events): |
@@ -362,14 +367,6 @@ def load_raw_session_summary_from_llm_content( |
362 | 367 | return raw_session_summary |
363 | 368 |
|
364 | 369 |
|
365 | | -# TODO Rework the logic, so events before the recording are marked as "LOAD", not 00:00 |
366 | | -def calculate_time_since_start(session_timestamp: str, session_start_time: datetime | None) -> int | None: |
367 | | - if not session_start_time or not session_timestamp: |
368 | | - return None |
369 | | - timestamp_datetime = datetime.fromisoformat(session_timestamp) |
370 | | - return max(0, int((timestamp_datetime - session_start_time).total_seconds() * 1000)) |
371 | | - |
372 | | - |
373 | 370 | def _validate_enriched_summary( |
374 | 371 | data: dict[str, Any], session_id: str, final_validation: bool |
375 | 372 | ) -> SessionSummarySerializer: |
@@ -454,6 +451,7 @@ def _calculate_segment_meta( |
454 | 451 | raw_key_actions: list[dict[str, Any]] | None, |
455 | 452 | session_duration: int, |
456 | 453 | session_id: str, |
| 454 | + final_validation: bool, |
457 | 455 | ) -> SegmentMetaSerializer: |
458 | 456 | # Find first and the last event in the segment |
459 | 457 | segment_index = raw_segment.get("index") |
@@ -599,11 +597,12 @@ def _calculate_segment_meta( |
599 | 597 | # TODO: Factor of two is arbitrary, find a better solution |
600 | 598 | if duration <= 0 or fallback_duration // duration > 2: |
601 | 599 | # Checking only duration as events are sorted chronologically |
602 | | - logger.warning( |
603 | | - f"Duration change is drastic (fallback: {fallback_duration} -> segments: {duration}) - using fallback data for session_id {session_id}", |
604 | | - session_id=session_id, |
605 | | - signals_type="session-summaries", |
606 | | - ) |
| 600 | + if final_validation: |
| 601 | + logger.warning( |
| 602 | + f"Duration change is drastic (fallback: {fallback_duration} -> segments: {duration}) - using fallback data for session_id {session_id}", |
| 603 | + session_id=session_id, |
| 604 | + signals_type="session-summaries", |
| 605 | + ) |
607 | 606 | segment_meta_data["duration"] = fallback_duration |
608 | 607 | segment_meta_data["duration_percentage"] = fallback_duration_percentage |
609 | 608 | segment_meta_data["events_count"] = fallback_events_count |
@@ -657,12 +656,14 @@ def enrich_raw_session_summary_with_meta( |
657 | 656 | simplified_events_mapping=simplified_events_mapping, |
658 | 657 | raw_key_actions=raw_key_actions, |
659 | 658 | session_id=session_id, |
| 659 | + final_validation=final_validation, |
660 | 660 | ) |
661 | 661 | # Validate the serializer to be able to use `.data` |
662 | 662 | if not segment_meta.is_valid(): |
663 | 663 | # Most of the fields are optional, so failed validation should be reported |
664 | 664 | msg = f"Error validating segment meta against the schema when summarizing session_id {session_id}: {segment_meta.errors}" |
665 | | - logger.error(msg, session_id=session_id, signals_type="session-summaries") |
| 665 | + if final_validation: |
| 666 | + logger.error(msg, session_id=session_id, signals_type="session-summaries") |
666 | 667 | raise SummaryValidationError(msg) |
667 | 668 | enriched_segment["meta"] = segment_meta.data |
668 | 669 | enriched_segments.append(enriched_segment) |
|
0 commit comments