Open-Source-Legal · JSv4 · Oct 25, 2025
diff --git a/config/settings/base.py b/config/settings/base.py
@@ -912,6 +912,14 @@
   → Need high-level overview of document structure
   → Understanding document organization before detailed search
 
+Use `generate_annotation_hyperlink` when:
+  🔴 MANDATORY: When returning annotations from similarity_search or search_exact_text
+  → You found relevant annotations and want to cite them in your response
+  → You need to reference specific passages that are stored as annotations
+  → Creating clickable links to relevant passages for the user
+  → Providing easy navigation to source material
+  → The tool creates markdown links like [annotation text](URL) that open directly to the annotation
+
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ✅ RESPONSE REQUIREMENTS:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -920,9 +928,35 @@
 • Include specific citations (page numbers, quotes) from tool results
 • 🔴 CRITICAL: If you used `load_document_text`, you MUST use `search_exact_text`
   on key passages to generate proper citations. Otherwise your answer will have NO SOURCES.
+• 🔴 MANDATORY: When your search results include annotation IDs (from similarity_search or
+  search_exact_text), you MUST call `generate_annotation_hyperlink(annotation_id)` for each
+  relevant annotation and include the resulting markdown links in your response. This allows
+  users to click directly to the exact location in the document.
 • If information isn't in the document, explicitly state it was not found
 • Use multiple search strategies to ensure thoroughness
-• Present findings clearly with proper attribution to sources"""
+• Present findings clearly with proper attribution to sources
+
+Example with annotation hyperlinks:
+  ❌ BAD:  "The contract states 'payment due within 30 days' (page 5)"
+  ✅ GOOD: "[payment due within 30 days](URL) appears on page 5"
+
+  Where the URL is generated by calling generate_annotation_hyperlink(annotation_id)
+
+🚨 BEFORE SENDING YOUR FINAL RESPONSE - MANDATORY HYPERLINK REVIEW:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+You MUST perform this review process before returning your response to the user:
+
+1. 📝 READ your drafted response text from start to finish
+2. 🔍 IDENTIFY every place where you quoted or referenced specific passages from the document
+3. 🔗 For EACH passage/quote that came from an annotation:
+   - Look up the annotation_id from your search results
+   - Call `generate_annotation_hyperlink(annotation_id)` to get the markdown link
+   - REPLACE the plain text quote with the markdown hyperlink
+4. ✅ VERIFY that key passages now have clickable [links](URL) to their locations
+5. ⚠️ If you quoted annotations without making them hyperlinks, go back and add them NOW
+
+This is NOT optional. If your response quotes passages without hyperlinks, you're not providing
+full value to the user. They need to click directly to the source."""
 
 DEFAULT_CORPUS_AGENT_INSTRUCTIONS = """You are a helpful corpus analysis assistant.
 Your role is to help users understand and analyze collections of documents by coordinating across
@@ -932,11 +966,32 @@
 1. ALWAYS use tools to gather information before answering
 2. You have access to multiple documents - use them effectively
 3. ALWAYS cite sources from specific documents when making claims
+4. 🔴 MANDATORY: Every time you mention a specific document by name or reference one in your response,
+   you MUST call `generate_document_hyperlink(document_id)` and include the resulting markdown link
+   in your response. This is NOT optional - users need clickable links to navigate to documents.
 
 **Available Tools:**
 - **Document-Specific Tools**: Available via `ask_document(document_id, question)`
 - **Corpus-Level Tools**: `list_documents()` to see all available documents
 - **Cross-Document Search**: Semantic search across the entire corpus
+- **Hyperlink Tools** (MUST USE WHEN REFERENCING DOCUMENTS):
+  - `generate_document_hyperlink(document_id)` - Creates clickable markdown links to documents
+  - `generate_corpus_hyperlink(corpus_id)` - Creates clickable markdown links to the corpus
+
+**MANDATORY HYPERLINK REQUIREMENTS:**
+🔴 You MUST generate hyperlinks in these situations (NO EXCEPTIONS):
+  ✓ When mentioning a document by title or name
+  ✓ When citing information from a specific document
+  ✓ When listing documents as part of your answer
+  ✓ When recommending documents for the user to review
+  ✓ When answering questions about specific documents
+
+Example responses:
+  ❌ BAD:  "Document ABC123 contains information about..."
+  ✅ GOOD: "[Document Title](URL) contains information about..."
+
+  ❌ BAD:  "See documents X, Y, and Z for more details"
+  ✅ GOOD: "See [Doc X](URL), [Doc Y](URL), and [Doc Z](URL) for more details"
 
 **Recommended Strategy:**
 1. If the corpus has a description, use it as context
@@ -946,10 +1001,26 @@
    - Use cross-document vector search for themes across documents
 3. Synthesize information from multiple sources
 4. Always cite which document(s) your information comes from
+5. 🔴 CRITICAL: For EVERY document you reference, call `generate_document_hyperlink(document_id)`
+   and include the markdown link in your response - this is MANDATORY, not optional
+
+🚨 BEFORE SENDING YOUR FINAL RESPONSE - MANDATORY HYPERLINK REVIEW:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+You MUST perform this review process before returning your response to the user:
+
+1. 📝 READ your drafted response text from start to finish
+2. 🔍 IDENTIFY every place where you mentioned a document (by title, name, or reference)
+3. 🔗 For EACH document reference found:
+   - Call `generate_document_hyperlink(document_id)` to get the markdown link
+   - REPLACE the plain text reference with the markdown hyperlink
+4. ✅ VERIFY that every document mentioned now has a clickable [link](URL)
+5. ⚠️ If you find ANY document reference without a hyperlink, you MUST fix it before responding
+
+This is NOT optional. If your response mentions documents without hyperlinks, go back and add them NOW.
 
 **When Corpus Has No Description:**
 Don't just say "the corpus description is empty" - that's not helpful! Instead:
-1. List available documents
+1. List available documents (with hyperlinks!)
 2. Ask the user which documents they want to know about
 3. OR proactively examine key documents to provide a useful summary
 

diff --git a/config/websocket/consumers/standalone_document_conversation.py b/config/websocket/consumers/standalone_document_conversation.py
@@ -169,33 +169,29 @@ async def pick_document_embedder(self) -> str:
         """
         from opencontractserver.annotations.models import Embedding
 
-        # Extract document ID in async context before passing to sync function
         document_id = self.document.id
 
-        def get_embedder_paths():
-            """
-            Construct AND evaluate queryset in same DB connection to avoid
-            transaction isolation issues with database_sync_to_async.
-            """
-            return list(
-                Embedding.objects.filter(
-                    annotation__document_id=document_id,
-                    annotation__structural=True,
-                )
-                .values_list("embedder_path", flat=True)
-                .distinct()
-            )
+        # Use async iteration to query in the same transaction context
+        # This avoids transaction isolation issues with database_sync_to_async
+        embeddings = [
+            e
+            async for e in Embedding.objects.filter(
+                annotation__document_id=document_id,
+                annotation__structural=True,
+            ).only("embedder_path")
+        ]
 
-        paths = await database_sync_to_async(get_embedder_paths)()
+        # Get unique embedder paths
+        paths = list({e.embedder_path for e in embeddings})
 
         if paths:
             logger.info(
-                f"[Session {self.session_id}] Using existing embedder: {paths[0]} for Document {getattr(self, 'document_id', 'unknown')}"  # noqa: E501
+                f"[Session {self.session_id}] Using existing embedder: {paths[0]} for Document {document_id}"
             )
             return paths[0]
         else:
             logger.warning(
-                f"[Session {self.session_id}] No existing embedder found for Document {getattr(self, 'document_id', 'unknown')}, "  # noqa: E501
+                f"[Session {self.session_id}] No existing embedder found for Document {document_id}, "
                 f"falling back to DEFAULT_EMBEDDER: {settings.DEFAULT_EMBEDDER}"
             )
             return settings.DEFAULT_EMBEDDER

diff --git a/opencontractserver/llms/tools/__init__.py b/opencontractserver/llms/tools/__init__.py
@@ -5,8 +5,14 @@
 """
 
 from opencontractserver.llms.tools.core_tools import (
+    agenerate_annotation_hyperlink,
+    agenerate_corpus_hyperlink,
+    agenerate_document_hyperlink,
     aload_document_md_summary,
     aload_document_txt_extract,
+    generate_annotation_hyperlink,
+    generate_corpus_hyperlink,
+    generate_document_hyperlink,
     get_md_summary_token_length,
     get_note_content_token_length,
     get_notes_for_document_corpus,
@@ -31,6 +37,13 @@
     "load_document_txt_extract",
     "aload_document_txt_extract",
     "aload_document_md_summary",
+    # Hyperlink generation tools
+    "generate_document_hyperlink",
+    "generate_annotation_hyperlink",
+    "generate_corpus_hyperlink",
+    "agenerate_document_hyperlink",
+    "agenerate_annotation_hyperlink",
+    "agenerate_corpus_hyperlink",
     # Factory and metadata
     "CoreTool",
     "ToolMetadata",