diff --git a/scripts/validate_session_json.py b/scripts/validate_session_json.py index 7068e5b96..b70cdbbb1 100644 --- a/scripts/validate_session_json.py +++ b/scripts/validate_session_json.py @@ -39,25 +39,34 @@ # Commit SHA pattern COMMIT_SHA_PATTERN = re.compile(r"^[a-f0-9]{7,40}$") -# Session start MUST items -SESSION_START_MUST_ITEMS = frozenset({ - "serenaActivated", - "serenaInstructions", - "handoffRead", - "sessionLogCreated", - "branchVerified", - "notOnMain", -}) - -# Session end MUST items -SESSION_END_MUST_ITEMS = frozenset({ - "checklistComplete", - "handoffNotUpdated", - "serenaMemoryUpdated", - "markdownLintRun", - "changesCommitted", - "validationPassed", -}) +# Minimum required session start items (must exist in every session log) +SESSION_START_REQUIRED_ITEMS = frozenset( + { + "serenaActivated", + "serenaInstructions", + "handoffRead", + "sessionLogCreated", + "branchVerified", + "notOnMain", + } +) + +# Minimum required session end items (must exist in every session log) +SESSION_END_REQUIRED_ITEMS = frozenset( + { + "checklistComplete", + "handoffNotUpdated", + "serenaMemoryUpdated", + "markdownLintRun", + "changesCommitted", + "validationPassed", + } +) + +# Evidence patterns that contradict a "complete: true" claim +CONTRADICTION_PATTERNS = re.compile( + r"(?i)\b(not available|skipped|N/A|deferred|will validate|will run|TODO|pending|TBD)\b" +) def get_case_insensitive(data: dict[str, Any], key: str) -> Any | None: @@ -139,6 +148,45 @@ def validate_must_item( if level == "MUST" and is_complete and not evidence: result.warnings.append(f"Missing evidence: {section_name}.{item_name}") + if level == "MUST" and is_complete and evidence and isinstance(evidence, str): + if CONTRADICTION_PATTERNS.search(evidence): + result.warnings.append( + f"Evidence contradiction: {section_name}.{item_name} " + f"is complete but evidence suggests otherwise: {evidence!r}" + ) + + +def validate_checklist_section( + section_data: dict[str, Any], + required_items: frozenset[str], + section_name: str, + result: ValidationResult, +) -> None: + """Validate all MUST items in a checklist section. + + Checks both the minimum required items and any additional items + in the section that declare level == "MUST". + + Args: + section_data: The section data (e.g. sessionStart or sessionEnd). + required_items: Minimum items that must exist in the section. + section_name: Section name for error messages. + result: ValidationResult to update with errors/warnings. + """ + # Collect all items to validate: required items + any item with level MUST + items_to_check: set[str] = set(required_items) + for item_name, item_data in section_data.items(): + if isinstance(item_data, dict): + level = get_case_insensitive(item_data, "level") + if level in ("MUST", "MUST NOT"): + items_to_check.add(item_name) + + for item_name in items_to_check: + if item_name in section_data: + validate_must_item(section_data[item_name], item_name, section_name, result) + else: + result.errors.append(f"Missing required item: {section_name}.{item_name}") + def validate_session_start(session_start: dict[str, Any], result: ValidationResult) -> None: """Validate the sessionStart section. @@ -147,9 +195,7 @@ def validate_session_start(session_start: dict[str, Any], result: ValidationResu session_start: The sessionStart section data. result: ValidationResult to update with errors/warnings. """ - for item in SESSION_START_MUST_ITEMS: - if item in session_start: - validate_must_item(session_start[item], item, "sessionStart", result) + validate_checklist_section(session_start, SESSION_START_REQUIRED_ITEMS, "sessionStart", result) def validate_session_end(session_end: dict[str, Any], result: ValidationResult) -> None: @@ -159,9 +205,7 @@ def validate_session_end(session_end: dict[str, Any], result: ValidationResult) session_end: The sessionEnd section data. result: ValidationResult to update with errors/warnings. """ - for item in SESSION_END_MUST_ITEMS: - if item in session_end: - validate_must_item(session_end[item], item, "sessionEnd", result) + validate_checklist_section(session_end, SESSION_END_REQUIRED_ITEMS, "sessionEnd", result) # MUST NOT check: handoffNotUpdated should NOT be complete (HANDOFF.md is read-only) if "handoffNotUpdated" in session_end: diff --git a/tests/test_validate_session_json.py b/tests/test_validate_session_json.py index 536430369..c69e63c75 100644 --- a/tests/test_validate_session_json.py +++ b/tests/test_validate_session_json.py @@ -18,11 +18,15 @@ from scripts.validate_session_json import ( BRANCH_PATTERN, COMMIT_SHA_PATTERN, + CONTRADICTION_PATTERNS, REQUIRED_SESSION_FIELDS, + SESSION_END_REQUIRED_ITEMS, + SESSION_START_REQUIRED_ITEMS, ValidationResult, get_case_insensitive, has_case_insensitive, load_session_file, + validate_checklist_section, validate_protocol_compliance, validate_session_end, validate_session_log, @@ -30,6 +34,28 @@ validate_session_start, ) + +def _make_complete_start_section(**overrides: dict) -> dict: + """Build a sessionStart section with all required items complete.""" + section = { + name: {"complete": True, "evidence": "Evidence", "level": "MUST"} + for name in SESSION_START_REQUIRED_ITEMS + } + section.update(overrides) + return section + + +def _make_complete_end_section(**overrides: dict) -> dict: + """Build a sessionEnd section with all required items complete.""" + section = { + name: {"complete": True, "evidence": "Evidence", "level": "MUST"} + for name in SESSION_END_REQUIRED_ITEMS + } + # handoffNotUpdated is MUST NOT: complete=False is the correct state + section["handoffNotUpdated"] = {"complete": False, "evidence": "Not updated", "level": "MUST NOT"} + section.update(overrides) + return section + if TYPE_CHECKING: from _pytest.capture import CaptureFixture @@ -218,9 +244,7 @@ class TestValidateSessionStart: def test_complete_must_items(self) -> None: """Complete MUST items pass validation.""" - session_start = { - "serenaActivated": {"complete": True, "evidence": "Evidence", "level": "MUST"}, - } + session_start = _make_complete_start_section() result = ValidationResult() validate_session_start(session_start, result) @@ -229,21 +253,21 @@ def test_complete_must_items(self) -> None: def test_incomplete_must_item(self) -> None: """Incomplete MUST item causes error.""" - session_start = { - "serenaActivated": {"complete": False, "evidence": "", "level": "MUST"}, - } + session_start = _make_complete_start_section( + serenaActivated={"complete": False, "evidence": "", "level": "MUST"}, + ) result = ValidationResult() validate_session_start(session_start, result) assert not result.is_valid - assert "Incomplete MUST" in result.errors[0] + assert any("Incomplete MUST" in e for e in result.errors) def test_missing_evidence_warning(self) -> None: """Missing evidence on complete MUST causes warning.""" - session_start = { - "serenaActivated": {"complete": True, "evidence": "", "level": "MUST"}, - } + session_start = _make_complete_start_section( + serenaActivated={"complete": True, "evidence": "", "level": "MUST"}, + ) result = ValidationResult() validate_session_start(session_start, result) @@ -258,9 +282,7 @@ class TestValidateSessionEnd: def test_valid_session_end(self) -> None: """Valid session end passes validation.""" - session_end = { - "checklistComplete": {"complete": True, "evidence": "Done", "level": "MUST"}, - } + session_end = _make_complete_end_section() result = ValidationResult() validate_session_end(session_end, result) @@ -269,9 +291,9 @@ def test_valid_session_end(self) -> None: def test_must_not_violation(self) -> None: """MUST NOT violation causes error.""" - session_end = { - "handoffNotUpdated": {"complete": True, "level": "MUST NOT"}, # Violated! - } + session_end = _make_complete_end_section( + handoffNotUpdated={"complete": True, "level": "MUST NOT"}, # Violated! + ) result = ValidationResult() validate_session_end(session_end, result) @@ -280,6 +302,137 @@ def test_must_not_violation(self) -> None: assert any("MUST NOT violated" in e for e in result.errors) +class TestChecklistSectionValidation: + """Tests for validate_checklist_section - the core fix for issue #1028.""" + + def test_unknown_must_item_incomplete_causes_error(self) -> None: + """MUST items NOT in the required set are still validated.""" + section_data = { + "usageMandatoryRead": {"complete": False, "evidence": "", "level": "MUST"}, + } + result = ValidationResult() + + validate_checklist_section( + section_data, frozenset(), "sessionStart", result + ) + + assert not result.is_valid + assert any("usageMandatoryRead" in e for e in result.errors) + + def test_unknown_must_item_complete_passes(self) -> None: + """Complete MUST items NOT in the required set pass validation.""" + section_data = { + "customMustItem": {"complete": True, "evidence": "Done", "level": "MUST"}, + } + result = ValidationResult() + + validate_checklist_section( + section_data, frozenset(), "sessionStart", result + ) + + assert result.is_valid + + def test_should_items_not_checked_as_must(self) -> None: + """SHOULD items that are incomplete do not cause errors.""" + section_data = { + "optionalItem": {"complete": False, "evidence": "", "level": "SHOULD"}, + } + result = ValidationResult() + + validate_checklist_section( + section_data, frozenset(), "sessionStart", result + ) + + assert result.is_valid + + def test_missing_required_item_causes_error(self) -> None: + """Required item absent from section_data causes an error.""" + section_data = { + "someOtherItem": {"complete": True, "evidence": "Done", "level": "MUST"}, + } + result = ValidationResult() + + validate_checklist_section( + section_data, frozenset({"requiredButMissing"}), "sessionStart", result + ) + + assert not result.is_valid + assert any("Missing required item: sessionStart.requiredButMissing" in e for e in result.errors) + + def test_non_dict_items_ignored(self) -> None: + """Non-dict values in section data are ignored.""" + section_data = { + "someString": "not a dict", + "someNumber": 42, + } + result = ValidationResult() + + validate_checklist_section( + section_data, frozenset(), "sessionStart", result + ) + + assert result.is_valid + + +class TestEvidenceContradiction: + """Tests for evidence-contradiction detection.""" + + @pytest.mark.parametrize( + "evidence", + [ + "not available", + "SKIPPED", + "N/A", + "Deferred to next session", + "will validate later", + "will run after merge", + "TODO", + "pending review", + "TBD", + ], + ) + def test_contradiction_detected(self, evidence: str) -> None: + """Evidence containing skip/waiver patterns triggers warning.""" + section_data = { + "serenaActivated": { + "complete": True, + "evidence": evidence, + "level": "MUST", + }, + } + result = ValidationResult() + + validate_checklist_section( + section_data, frozenset(), "sessionStart", result + ) + + assert any("Evidence contradiction" in w for w in result.warnings) + + def test_legitimate_evidence_no_contradiction(self) -> None: + """Legitimate evidence does not trigger contradiction warning.""" + section_data = { + "serenaActivated": { + "complete": True, + "evidence": "mcp__serena__activate_project output confirmed", + "level": "MUST", + }, + } + result = ValidationResult() + + validate_checklist_section( + section_data, frozenset(), "sessionStart", result + ) + + assert not any("Evidence contradiction" in w for w in result.warnings) + + def test_contradiction_pattern_matches_expected(self) -> None: + """CONTRADICTION_PATTERNS matches known skip indicators.""" + assert CONTRADICTION_PATTERNS.search("not available") + assert CONTRADICTION_PATTERNS.search("SKIPPED due to CI") + assert CONTRADICTION_PATTERNS.search("N/A for this session") + assert not CONTRADICTION_PATTERNS.search("Tool output confirmed") + + class TestValidateProtocolCompliance: """Tests for validate_protocol_compliance function.""" @@ -332,8 +485,8 @@ def test_valid_minimal_log(self) -> None: "objective": "Test", }, "protocolCompliance": { - "sessionStart": {}, - "sessionEnd": {}, + "sessionStart": _make_complete_start_section(), + "sessionEnd": _make_complete_end_section(), }, } @@ -420,8 +573,8 @@ def valid_session_file(self, tmp_path: Path) -> Path: "objective": "Test objective", }, "protocolCompliance": { - "sessionStart": {}, - "sessionEnd": {}, + "sessionStart": _make_complete_start_section(), + "sessionEnd": _make_complete_end_section(), }, } session_file = tmp_path / "valid-session.json" @@ -449,9 +602,7 @@ def test_main_valid_session( from scripts import validate_session_json # Allow temp directory paths for testing - monkeypatch.setattr( - validate_session_json, "_PROJECT_ROOT", valid_session_file.parent - ) + monkeypatch.setattr(validate_session_json, "_PROJECT_ROOT", valid_session_file.parent) monkeypatch.setattr( "sys.argv", ["validate_session_json.py", str(valid_session_file)], @@ -473,9 +624,7 @@ def test_main_invalid_session( from scripts import validate_session_json # Allow temp directory paths for testing - monkeypatch.setattr( - validate_session_json, "_PROJECT_ROOT", invalid_session_file.parent - ) + monkeypatch.setattr(validate_session_json, "_PROJECT_ROOT", invalid_session_file.parent) monkeypatch.setattr( "sys.argv", ["validate_session_json.py", str(invalid_session_file)], @@ -497,9 +646,7 @@ def test_main_pre_commit_mode( from scripts import validate_session_json # Allow temp directory paths for testing - monkeypatch.setattr( - validate_session_json, "_PROJECT_ROOT", invalid_session_file.parent - ) + monkeypatch.setattr(validate_session_json, "_PROJECT_ROOT", invalid_session_file.parent) monkeypatch.setattr( "sys.argv", ["validate_session_json.py", str(invalid_session_file), "--pre-commit"], @@ -622,8 +769,8 @@ def test_extra_fields_allowed(self) -> None: "extraField": "allowed", }, "protocolCompliance": { - "sessionStart": {}, - "sessionEnd": {}, + "sessionStart": _make_complete_start_section(), + "sessionEnd": _make_complete_end_section(), }, "workLog": [], "decisions": [],