Support grouping contexts by question in the final context (#1032)

mskarlin · web-flow · commit 608850e9e78c · 2025-07-25T15:06:39.000-07:00
diff --git a/README.md b/README.md
@@ -885,6 +885,7 @@ will return much faster than the first query and we'll be certain the authors ma
 | `answer.max_concurrent_requests`             | `4`                                    | Max concurrent requests to LLMs.                                                                        |
 | `answer.answer_filter_extra_background`      | `False`                                | Whether to cite background info from model.                                                             |
 | `answer.get_evidence_if_no_contexts`         | `True`                                 | Allow lazy evidence gathering.                                                                          |
+| `answer.group_contexts_by_question`          | `False`                                | Groups the final contexts by the underlying `gather_evidence` question in the final context prompt.     |
 | `answer.evidence_relevance_score_cutoff`     | `1`                                    | Cutoff evidence relevance score to include in the answer context (inclusive)                            |
 | `parsing.chunk_size`                         | `5000`                                 | Characters per chunk (0 for no chunking).                                                               |
 | `parsing.page_size_limit`                    | `1,280,000`                            | Character limit per page.                                                                               |
diff --git a/src/paperqa/docs.py b/src/paperqa/docs.py
@@ -7,6 +7,7 @@
 import tempfile
 import urllib.request
 import warnings
+from collections import defaultdict
 from collections.abc import Callable, Sequence
 from datetime import datetime
 from io import BytesIO
@@ -29,7 +30,7 @@
 from paperqa.prompts import CANNOT_ANSWER_PHRASE
 from paperqa.readers import read_doc
 from paperqa.settings import MaybeSettings, get_settings
-from paperqa.types import Doc, DocDetails, DocKey, PQASession, Text
+from paperqa.types import Context, Doc, DocDetails, DocKey, PQASession, Text
 from paperqa.utils import (
     citation_to_docname,
     get_loop,
@@ -785,26 +786,53 @@ async def aquery(  # noqa: PLR0912
             # Only keep "\nFrom {citation}" if we are showing detailed citations
             context_inner_prompt = context_inner_prompt.replace("\nFrom {citation}", "")
 
-        inner_context_strs = [
-            context_inner_prompt.format(
-                name=c.id,
-                text=c.context,
-                citation=c.text.doc.formatted_citation,
-                **(c.model_extra or {}),
-            )
-            for c in filtered_contexts
-        ]
+        context_str_body = ""
+        if answer_config.group_contexts_by_question:
+            contexts_by_question: dict[str, list[Context]] = defaultdict(list)
+            for c in filtered_contexts:
+                # Fallback to the main session question if not available.
+                # question attribute is optional, so if a user
+                # sets contexts externally, it may not have a question.
+                question = getattr(c, "question", session.question)
+                contexts_by_question[question].append(c)
+
+            context_sections = []
+            for question, contexts_in_group in contexts_by_question.items():
+                inner_strs = [
+                    context_inner_prompt.format(
+                        name=c.id,
+                        text=c.context,
+                        citation=c.text.doc.formatted_citation,
+                        **(c.model_extra or {}),
+                    )
+                    for c in contexts_in_group
+                ]
+                # Create a section with a question heading
+                section_header = f'Contexts related to the question: "{question}"'
+                section = f"{section_header}\n\n" + "\n\n".join(inner_strs)
+                context_sections.append(section)
+            context_str_body = "\n\n----\n\n".join(context_sections)
+        else:
+            inner_context_strs = [
+                context_inner_prompt.format(
+                    name=c.id,
+                    text=c.context,
+                    citation=c.text.doc.formatted_citation,
+                    **(c.model_extra or {}),
+                )
+                for c in filtered_contexts
+            ]
+            context_str_body = "\n\n".join(inner_context_strs)
+
         if pre_str:
-            inner_context_strs += (
-                [f"Extra background information: {pre_str}"] if pre_str else []
-            )
+            context_str_body += f"\n\nExtra background information: {pre_str}"
 
         context_str = prompt_config.context_outer.format(
-            context_str="\n\n".join(inner_context_strs),
+            context_str=context_str_body,
             valid_keys=", ".join([c.id for c in filtered_contexts]),
         )
 
-        if len(context_str) < 10:  # noqa: PLR2004
+        if len(context_str_body.strip()) < 10:  # noqa: PLR2004
             answer_text = (
                 f"{CANNOT_ANSWER_PHRASE} this question due to insufficient information."
             )
diff --git a/src/paperqa/settings.py b/src/paperqa/settings.py
@@ -120,6 +120,10 @@ class AnswerSettings(BaseModel):
             " called before evidence was gathered."
         ),
     )
+    group_contexts_by_question: bool = Field(
+        default=False,
+        description="Whether to group contexts by question when generating answers.",
+    )
 
     @model_validator(mode="after")
     def _deprecated_field(self) -> Self:
diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py
@@ -52,7 +52,7 @@
 from paperqa.prompts import CANNOT_ANSWER_PHRASE
 from paperqa.prompts import qa_prompt as default_qa_prompt
 from paperqa.readers import PDFParserFn, read_doc
-from paperqa.types import ChunkMetadata
+from paperqa.types import ChunkMetadata, Context
 from paperqa.utils import (
     clean_possessives,
     encode_id,
@@ -566,6 +566,77 @@ async def test_query(docs_fixture) -> None:
     await docs_fixture.aquery("Is XAI usable in chemistry?", settings=settings)
 
 
+@pytest.mark.asyncio
+async def test_aquery_groups_contexts_by_question(docs_fixture) -> None:
+
+    session = PQASession(question="What is the relationship between chemistry and AI?")
+
+    doc = Doc(docname="test_doc", citation="Test Doc, 2025", dockey="key1")
+    text1 = Text(text="XAI is useful for molecules.", name="t1", doc=doc)
+    text2 = Text(text="Drug discovery uses AI.", name="t2", doc=doc)
+    text3 = Text(text="Organic chemistry is a field.", name="t3", doc=doc)
+
+    session.contexts = [
+        Context(
+            text=text1,
+            context="Explanation about XAI and molecules.",
+            score=6,
+            question="Is XAI usable in chemistry?",
+        ),
+        Context(
+            text=text2,
+            context="Details on how drug discovery leverages AI.",
+            score=5,
+            question="Is XAI usable in chemistry?",
+        ),
+        Context(
+            text=text3,
+            context="General facts about organic chemistry.",
+            score=5,
+            question="What is organic chemistry?",
+        ),
+    ]
+
+    settings = Settings(
+        prompts={"answer_iteration_prompt": None},
+        answer={"group_contexts_by_question": True},
+    )
+
+    result = await docs_fixture.aquery(session, settings=settings)
+
+    final_context_str = result.context
+
+    assert (
+        'Contexts related to the question: "Is XAI usable in chemistry?"'
+        in final_context_str
+    )
+
+    assert (
+        'Contexts related to the question: "What is organic chemistry?"'
+        in final_context_str
+    )
+
+    assert "Explanation about XAI and molecules." in final_context_str
+    assert "Details on how drug discovery leverages AI." in final_context_str
+    assert "General facts about organic chemistry." in final_context_str
+
+    assert "\n\n----\n\n" in final_context_str
+    q1_header_pos = final_context_str.find(
+        'Contexts related to the question: "Is XAI usable in chemistry?"'
+    )
+    q2_header_pos = final_context_str.find(
+        'Contexts related to the question: "What is organic chemistry?"'
+    )
+    context1_pos = final_context_str.find("Explanation about XAI and molecules.")
+    context3_pos = final_context_str.find("General facts about organic chemistry.")
+
+    assert (
+        0 == q1_header_pos < context1_pos
+    ), "Expected q1 header to be first, and the context to follow."
+    assert q1_header_pos < q2_header_pos
+    assert q2_header_pos < context3_pos
+
+
 @pytest.mark.asyncio
 async def test_query_with_iteration(docs_fixture) -> None:
     # we store these results to check that the prompts are OK