Allow users to skip citation stripping from evidence (#1035)

mskarlin · Copilot · web-flow · commit ad1664646efa · 2025-07-28T17:44:13.000-07:00
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -887,6 +887,7 @@ will return much faster than the first query and we'll be certain the authors ma
 | `answer.get_evidence_if_no_contexts`         | `True`                                 | Allow lazy evidence gathering.                                                                          |
 | `answer.group_contexts_by_question`          | `False`                                | Groups the final contexts by the underlying `gather_evidence` question in the final context prompt.     |
 | `answer.evidence_relevance_score_cutoff`     | `1`                                    | Cutoff evidence relevance score to include in the answer context (inclusive)                            |
+| `answer.skip_evidence_citation_strip`        | `False`                                | Skip removal of citations from the `gather_evidence` contexts                                           |
 | `parsing.chunk_size`                         | `5000`                                 | Characters per chunk (0 for no chunking).                                                               |
 | `parsing.page_size_limit`                    | `1,280,000`                            | Character limit per page.                                                                               |
 | `parsing.pdfs_use_block_parsing`             | `False`                                | Opt-in flag for block-based PDF parsing over text-based PDF parsing.                                    |
diff --git a/src/paperqa/core.py b/src/paperqa/core.py
@@ -135,6 +135,7 @@ async def map_fxn_summary(
     extra_prompt_data: dict[str, str] | None = None,
     parser: Callable[[str], dict[str, Any]] | None = None,
     callbacks: Sequence[Callable[[str], None]] | None = None,
+    skip_citation_strip: bool = False,
 ) -> tuple[Context, LLMResult]:
     """Parses the given text and returns a context object with the parser and prompt runner.
 
@@ -152,6 +153,7 @@ async def map_fxn_summary(
         parser: Optional parser function to parse LLM output into structured data.
             Should return dict with at least 'summary' field.
         callbacks: Optional sequence of callback functions to execute during LLM calls.
+        skip_citation_strip: Optional skipping of citation stripping, if you want to keep in the context.
 
     Returns:
         The context object and LLMResult to get info about the LLM execution.
@@ -206,7 +208,9 @@ async def map_fxn_summary(
         score = 5
         success = True
     # remove citations that collide with our grounded citations (for the answer LLM)
-    context = strip_citations(context)
+    if not skip_citation_strip:
+        context = strip_citations(context)
+
     if not success:
         score = extract_score(context)
 
diff --git a/src/paperqa/docs.py b/src/paperqa/docs.py
@@ -669,6 +669,7 @@ async def aget_evidence(
                         },
                         parser=llm_parse_json if prompt_config.use_json else None,
                         callbacks=callbacks,
+                        skip_citation_strip=answer_config.skip_evidence_citation_strip,
                     )
                     for m in matches
                 ],
diff --git a/src/paperqa/settings.py b/src/paperqa/settings.py
@@ -124,6 +124,10 @@ class AnswerSettings(BaseModel):
         default=False,
         description="Whether to group contexts by question when generating answers.",
     )
+    skip_evidence_citation_strip: bool = Field(
+        default=False,
+        description="Whether to skip stripping citations from evidence.",
+    )
 
     @model_validator(mode="after")
     def _deprecated_field(self) -> Self:
diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py
@@ -579,7 +579,7 @@ async def test_aquery_groups_contexts_by_question(docs_fixture) -> None:
     session.contexts = [
         Context(
             text=text1,
-            context="Explanation about XAI and molecules.",
+            context="Explanation about XAI and molecules (Smith 1999).",
             score=6,
             question="Is XAI usable in chemistry?",
         ),
@@ -599,7 +599,10 @@ async def test_aquery_groups_contexts_by_question(docs_fixture) -> None:
 
     settings = Settings(
         prompts={"answer_iteration_prompt": None},
-        answer={"group_contexts_by_question": True},
+        answer={
+            "group_contexts_by_question": True,
+            "skip_evidence_citation_strip": True,
+        },
     )
 
     result = await docs_fixture.aquery(session, settings=settings)
@@ -616,7 +619,7 @@ async def test_aquery_groups_contexts_by_question(docs_fixture) -> None:
         in final_context_str
     )
 
-    assert "Explanation about XAI and molecules." in final_context_str
+    assert "Explanation about XAI and molecules (Smith 1999)." in final_context_str
     assert "Details on how drug discovery leverages AI." in final_context_str
     assert "General facts about organic chemistry." in final_context_str
 
@@ -627,7 +630,9 @@ async def test_aquery_groups_contexts_by_question(docs_fixture) -> None:
     q2_header_pos = final_context_str.find(
         'Contexts related to the question: "What is organic chemistry?"'
     )
-    context1_pos = final_context_str.find("Explanation about XAI and molecules.")
+    context1_pos = final_context_str.find(
+        "Explanation about XAI and molecules (Smith 1999)."
+    )
     context3_pos = final_context_str.find("General facts about organic chemistry.")
 
     assert (

Original file line number	Diff line number	Diff line change
`@@ -669,6 +669,7 @@ async def aget_evidence(`
`669`	`669`	`},`
`670`	`670`	`parser=llm_parse_json if prompt_config.use_json else None,`
`671`	`671`	`callbacks=callbacks,`
	`672`	`+ skip_citation_strip=answer_config.skip_evidence_citation_strip,`
`672`	`673`	`)`
`673`	`674`	`for m in matches`
`674`	`675`	`],`