Expose answer evidence score cutoff as config (#1031)

mskarlin · web-flow · commit 2ca6b8ff7d39 · 2025-07-25T14:45:07.000-07:00
diff --git a/README.md b/README.md
@@ -885,6 +885,7 @@ will return much faster than the first query and we'll be certain the authors ma
 | `answer.max_concurrent_requests`             | `4`                                    | Max concurrent requests to LLMs.                                                                        |
 | `answer.answer_filter_extra_background`      | `False`                                | Whether to cite background info from model.                                                             |
 | `answer.get_evidence_if_no_contexts`         | `True`                                 | Allow lazy evidence gathering.                                                                          |
+| `answer.evidence_relevance_score_cutoff`     | `1`                                    | Cutoff evidence relevance score to include in the answer context (inclusive)                            |
 | `parsing.chunk_size`                         | `5000`                                 | Characters per chunk (0 for no chunking).                                                               |
 | `parsing.page_size_limit`                    | `1,280,000`                            | Character limit per page.                                                                               |
 | `parsing.pdfs_use_block_parsing`             | `False`                                | Opt-in flag for block-based PDF parsing over text-based PDF parsing.                                    |
diff --git a/src/paperqa/docs.py b/src/paperqa/docs.py
@@ -768,8 +768,12 @@ async def aquery(  # noqa: PLR0912
             contexts,
             key=lambda x: (-x.score, x.text.name),
         )[: answer_config.answer_max_sources]
-        # remove any contexts with a score of 0
-        filtered_contexts = [c for c in filtered_contexts if c.score > 0]
+        # remove any contexts with a score below the cutoff
+        filtered_contexts = [
+            c
+            for c in filtered_contexts
+            if c.score >= answer_config.evidence_relevance_score_cutoff
+        ]
 
         # shim deprecated flag
         # TODO: remove in v6
diff --git a/src/paperqa/settings.py b/src/paperqa/settings.py
@@ -77,6 +77,15 @@ class AnswerSettings(BaseModel):
         default=True,
         description="Whether to use retrieval instead of processing all docs.",
     )
+    # no validator because you can set the range in a prompt
+    evidence_relevance_score_cutoff: int = Field(
+        default=1,
+        ge=0,
+        description=(
+            "Relevance score cutoff for evidence retrieval, default is 1, meaning"
+            " only evidence with relevance score >= 1 will be used."
+        ),
+    )
     evidence_summary_length: str = Field(
         default="about 100 words", description="Length of evidence summary."
     )