mayflower
diff --git a/‎apps/api/app/mcp_server.py‎
Lines changed: 44 additions & 8 deletions b/‎apps/api/app/mcp_server.py‎
Lines changed: 44 additions & 8 deletions
diff --git a/‎apps/worker/contextmine_worker/flows.py‎
Lines changed: 55 additions & 6 deletions b/‎apps/worker/contextmine_worker/flows.py‎
Lines changed: 55 additions & 6 deletions
diff --git a/‎apps/worker/tests/test_flows_final.py‎
Lines changed: 7 additions & 3 deletions b/‎apps/worker/tests/test_flows_final.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎packages/core/contextmine_core/analyzer/extractors/ast_utils.py‎
Lines changed: 86 additions & 0 deletions b/‎packages/core/contextmine_core/analyzer/extractors/ast_utils.py‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎packages/core/contextmine_core/analyzer/extractors/flows.py‎
Lines changed: 0 additions & 6 deletions b/‎packages/core/contextmine_core/analyzer/extractors/flows.py‎
Lines changed: 0 additions & 6 deletions
@@ -1325,7 +1325,15 @@ async def research_validation(
             matched_candidates = _match_candidates_by_query(all_candidates, query_words)
 
             if not matched_rules and not matched_candidates:
-                return f"# No Validation Rules Found\n\nNo business rules or validation logic found for: `{code_path}`"
+                return (
+                    f"# No Validation Rules Found\n\n"
+                    f"No business rules or validation logic found for: `{code_path}`\n\n"
+                    "**Possible causes:**\n"
+                    "- The source hasn't been synced yet — trigger a sync to run extraction\n"
+                    "- No LLM provider is configured (`DEFAULT_LLM_PROVIDER` env var) — "
+                    "business rule extraction requires an LLM\n"
+                    "- The code genuinely has no validation patterns for this path"
+                )
 
             return _format_validation_results(code_path, matched_rules, matched_candidates)
 
@@ -1481,7 +1489,14 @@ async def research_data_model(
             erd_artifact = result.scalar_one_or_none()
 
             if not tables and not columns and not endpoints:
-                return f"# No Data Model Found\n\nNo tables, columns, or APIs found for: `{entity}`"
+                return (
+                    f"# No Data Model Found\n\n"
+                    f"No tables, columns, or APIs found for: `{entity}`\n\n"
+                    "**Possible causes:**\n"
+                    "- The source hasn't been synced yet — trigger a sync to run schema extraction\n"
+                    "- No SQL/DDL, Django, SQLAlchemy, Prisma, or migration files were found\n"
+                    "- Try a broader search term (e.g., a table name or entity name)"
+                )
 
             return _format_data_model_results(
                 entity, entity_lower, tables, columns, endpoints, erd_artifact
@@ -1746,7 +1761,11 @@ async def research_architecture(
             if len(lines) == 1:
                 lines.append(f"No specific architecture information found for topic: `{topic}`\n")
                 lines.append(
-                    "Try topics like: api, deployment, database, security, ui, tests, flows"
+                    "Try topics like: api, deployment, database, security, ui, tests, flows\n"
+                )
+                lines.append(
+                    "**If all topics are empty**, the knowledge graph may not be populated yet. "
+                    "Ensure the source has been synced and extraction completed successfully."
                 )
 
             return "\n".join(lines)
@@ -1988,7 +2007,15 @@ async def _graph_rag_context(
             or md
             == f"# GraphRAG Context: {query}\n\nFound 0 communities, 0 entities, 0 citations.\n"
         ):
-            return f"# No Results\n\nNo relevant content found for: {query}"
+            return (
+                f"# No Results\n\n"
+                f"No relevant content found for: {query}\n\n"
+                "**Possible causes:**\n"
+                "- The knowledge graph has no semantic entities yet — ensure sync completed "
+                "with LLM extraction enabled\n"
+                "- Community summaries haven't been generated or embedded\n"
+                "- Try rephrasing the query with different terms"
+            )
 
         if rebuild_mode:
             md += (
@@ -2571,9 +2598,13 @@ async def mcp_get_arc42(
 
             if not regenerate:
                 return (
-                    "# Error\n\n"
-                    "arc42 artifact not generated yet. Trigger explicit generation with "
-                    "`regenerate=true`."
+                    "# arc42 Not Generated Yet\n\n"
+                    "The arc42 architecture document hasn't been generated for this collection.\n\n"
+                    "**To generate it:**\n"
+                    "1. Ensure the source has been synced (this builds the knowledge graph)\n"
+                    "2. Call `get_arc42(regenerate=true)` to generate the document\n\n"
+                    "If `arch_docs_generate_on_sync` is enabled (default), "
+                    "arc42 is generated automatically during sync."
                 )
 
             return await _arc42_regenerate(
@@ -2660,7 +2691,12 @@ async def _resolve_drift_baseline(
             )
         ).scalar_one_or_none()
         if baseline is None:
-            return None, "# Error\n\nBaseline scenario not found in collection."
+            return None, (
+                "# Error\n\n"
+                "Baseline scenario not found in collection.\n\n"
+                "Drift reports compare two scenarios. Ensure a baseline scenario exists "
+                "by running at least two syncs, or specify a valid `baseline_scenario_id`."
+            )
         return baseline, None
 
     baseline = None
 
@@ -945,8 +945,10 @@ async def _kg_extract_business_rules(
     from contextmine_core.treesitter.languages import detect_language
 
     if changed_doc_ids is not None and len(changed_doc_ids) == 0:
-        logger.info("No changed documents - skipping business rule extraction")
-        return 0
+        if await _kg_has_business_rules(collection_uuid):
+            logger.info("No changed documents and business rules exist - skipping extraction")
+            return 0
+        logger.info("No changed documents but no business rules found - running initial extraction")
 
     all_extractions = []
     async with get_session() as session:
@@ -1032,6 +1034,38 @@ async def _kg_extract_surfaces(source_uuid: object, collection_uuid: object) ->
     return result_stats
 
 
+async def _kg_has_semantic_entities(collection_uuid: object) -> bool:
+    """Check if any SEMANTIC_ENTITY nodes exist for this collection."""
+    from contextmine_core.models import KnowledgeNode, KnowledgeNodeKind
+
+    async with get_session() as session:
+        result = await session.execute(
+            select(KnowledgeNode.id)
+            .where(
+                KnowledgeNode.collection_id == collection_uuid,
+                KnowledgeNode.kind == KnowledgeNodeKind.SEMANTIC_ENTITY,
+            )
+            .limit(1)
+        )
+        return result.scalar_one_or_none() is not None
+
+
+async def _kg_has_business_rules(collection_uuid: object) -> bool:
+    """Check if any BUSINESS_RULE nodes exist for this collection."""
+    from contextmine_core.models import KnowledgeNode, KnowledgeNodeKind
+
+    async with get_session() as session:
+        result = await session.execute(
+            select(KnowledgeNode.id)
+            .where(
+                KnowledgeNode.collection_id == collection_uuid,
+                KnowledgeNode.kind == KnowledgeNodeKind.BUSINESS_RULE,
+            )
+            .limit(1)
+        )
+        return result.scalar_one_or_none() is not None
+
+
 async def _kg_step_semantic_entities(
     stats: dict,
     collection_uuid: object,
@@ -1047,8 +1081,15 @@ async def _kg_step_semantic_entities(
         )
 
         if changed_doc_ids is not None and len(changed_doc_ids) == 0:
-            logger.info("No changed documents - skipping semantic entity extraction")
-            return
+            # No docs changed — skip only if entities already exist from a prior run
+            if await _kg_has_semantic_entities(collection_uuid):
+                logger.info(
+                    "No changed documents and semantic entities exist - skipping extraction"
+                )
+                return
+            logger.info(
+                "No changed documents but no semantic entities found - running initial extraction"
+            )
         async with get_session() as session:
             extraction_batch = await extract_from_documents(
                 collection_id=collection_uuid,
@@ -1112,8 +1153,16 @@ async def _kg_step_summaries(
         from contextmine_core.knowledge.summaries import generate_community_summaries
 
         if changed_doc_ids is not None and len(changed_doc_ids) == 0:
-            logger.info("No changed documents - skipping community summary regeneration")
-            return
+            # No docs changed — skip only if semantic entities (and thus summaries)
+            # already exist from a prior run
+            if await _kg_has_semantic_entities(collection_uuid):
+                logger.info(
+                    "No changed documents and semantic entities exist - skipping summary regeneration"
+                )
+                return
+            logger.info(
+                "No changed documents but no semantic entities found - running initial summary generation"
+            )
         async with get_session() as session:
             summary_stats = await generate_community_summaries(
                 session,
 
@@ -360,10 +360,10 @@ async def mock_exec(stmt):
 
 
 class TestBuildKGSemanticCommunity:
-    async def test_skip_semantic_when_no_changed_docs(
+    async def test_skip_semantic_when_no_changed_docs_and_entities_exist(
         self, monkeypatch: pytest.MonkeyPatch
     ) -> None:
-        """Line 935-936: Empty changed_doc_ids skips semantic extraction."""
+        """Empty changed_doc_ids skips semantic extraction when entities already exist."""
         source_id = str(uuid.uuid4())
         collection_id = str(uuid.uuid4())
 
@@ -375,10 +375,13 @@ async def test_skip_semantic_when_no_changed_docs(
 
         mock_session = AsyncMock()
 
+        # Return a non-None value for _kg_has_semantic_entities check
+        existing_node_id = uuid.uuid4()
+
         async def mock_exec(stmt):
             r = MagicMock()
             r.all.return_value = []
-            r.scalar_one_or_none.return_value = None
+            r.scalar_one_or_none.return_value = existing_node_id
             return r
 
         mock_session.execute = mock_exec
@@ -417,6 +420,7 @@ async def mock_exec(stmt):
             collection_id=collection_id,
             changed_doc_ids=[],
         )
+        # Skips extraction because semantic entities already exist
         extract_mock.assert_not_called()
 
     async def test_semantic_extraction_error_caught(self, monkeypatch: pytest.MonkeyPatch) -> None:
 
@@ -76,6 +76,92 @@ def is_pascal_case(value: str) -> bool:
     return all(ch.isalnum() or ch == "_" for ch in value)
 
 
+# ---------------------------------------------------------------------------
+# Cross-language AST helpers shared between extractors
+# ---------------------------------------------------------------------------
+
+
+def find_enclosing_class_name(
+    content: str,
+    node: Any,
+    class_type: str = "class_declaration",
+    name_fields: tuple[str, ...] = ("identifier",),
+) -> str | None:
+    """Walk up the AST to find the enclosing class name."""
+    parent = node.parent
+    while parent is not None:
+        if parent.type == class_type:
+            for field_name in name_fields:
+                name_node = first_child(parent, field_name)
+                if name_node:
+                    name = node_text(content, name_node).strip()
+                    if name:
+                        return name
+            break
+        parent = parent.parent
+    return None
+
+
+def ruby_first_string_arg(content: str, call_node: Any) -> str | None:
+    """Extract the first string argument from a Ruby call node."""
+    args = call_node.child_by_field_name("arguments")
+    if args is None:
+        for child in call_node.children:
+            if child.type == "argument_list":
+                args = child
+                break
+    if args is None:
+        return None
+    for child in args.children:
+        if child.type in {"string", "string_literal"}:
+            return unquote(node_text(content, child))
+    return None
+
+
+def java_annotation_names(content: str, node: Any) -> list[str]:
+    """Extract annotation names from a Java method/class node's modifiers."""
+    names: list[str] = []
+    parent = node.parent
+    if parent is None:
+        return names
+    for child in parent.children:
+        if child.type == "modifiers":
+            for mod in child.children:
+                if mod.type in {"marker_annotation", "annotation"}:
+                    name_node = first_child(mod, "identifier")
+                    if name_node:
+                        names.append(node_text(content, name_node).strip().lower())
+    return names
+
+
+def csharp_attribute_names(content: str, node: Any) -> set[str]:
+    """Extract attribute names from a C# node's preceding attribute_list siblings."""
+    attrs: set[str] = set()
+    parent = node.parent
+    if parent is None:
+        return attrs
+    for child in parent.children:
+        if child is node:
+            break
+        if child.type == "attribute_list":
+            for attr in walk(child):
+                if attr.type in {"identifier", "attribute"}:
+                    name = node_text(content, attr).strip().lower()
+                    if name.endswith("attribute"):
+                        name = name[: -len("attribute")]
+                    attrs.add(name)
+    # Also check direct children
+    for child in node.children:
+        if child.type == "attribute_list":
+            for attr in walk(child):
+                if attr.type in {"identifier", "attribute"}:
+                    name = node_text(content, attr).strip().lower()
+                    if name.endswith("attribute"):
+                        name = name[: -len("attribute")]
+                    attrs.add(name)
+    return attrs
+
+
 # ---------------------------------------------------------------------------
 # JS/TS AST helpers shared between the tests and UI extractors
 # ---------------------------------------------------------------------------
 
@@ -142,12 +142,6 @@ def synthesize_user_flows(
     route_to_symbol_hints, route_to_navigation_hints = _collect_route_hints_from_ui(ui_extractions)
     symbol_to_test_refs = _collect_symbol_test_refs(test_extractions)
 
-    for test_file in test_extractions:
-        for case in test_file.cases:
-            for symbol_hint in case.symbol_hints:
-                symbol_to_test_refs.setdefault(symbol_hint.lower(), [])
-                symbol_to_test_refs[symbol_hint.lower()].append(case.natural_key)
-
     synthesis = FlowSynthesis()
     for route, symbol_hints in sorted(route_to_symbol_hints.items()):
         deduped_hints = list(