Skip to content

Commit a88bb6a

Browse files
authored
Began deprecation cycle of exclude_text_filter (#994)
1 parent c74aab7 commit a88bb6a

File tree

2 files changed

+29
-12
lines changed

2 files changed

+29
-12
lines changed

paperqa/docs.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -605,28 +605,32 @@ async def aget_evidence(
605605
if summary_llm_model is None:
606606
summary_llm_model = evidence_settings.get_summary_llm()
607607

608-
exclude_text_filter = exclude_text_filter or set()
609-
exclude_text_filter |= {c.text.name for c in session.contexts}
610-
611-
_k = answer_config.evidence_k
612-
if exclude_text_filter:
613-
# Increase k to retrieve so we have enough to down-select after retrieval
614-
_k += len(exclude_text_filter)
608+
if exclude_text_filter is not None:
609+
text_name = Text.__name__
610+
warnings.warn(
611+
(
612+
"The 'exclude_text_filter' argument did not work as intended"
613+
f" due to a mix-up in excluding {text_name}.name vs {text_name}."
614+
f" This bug enabled us to have 2+ contexts per {text_name}, so to"
615+
" first-class that capability and simplify our implementation,"
616+
" we're removing the 'exclude_text_filter' argument."
617+
" This deprecation will conclude in version 6"
618+
),
619+
category=DeprecationWarning,
620+
stacklevel=2,
621+
)
615622

616623
if answer_config.evidence_retrieval:
617624
matches = await self.retrieve_texts(
618625
session.question,
619-
_k,
626+
answer_config.evidence_k,
620627
evidence_settings,
621628
embedding_model,
622629
partitioning_fn=partitioning_fn,
623630
)
624631
else:
625632
matches = self.texts
626633

627-
if exclude_text_filter:
628-
matches = [m for m in matches if m.text not in exclude_text_filter]
629-
630634
matches = (
631635
matches[: answer_config.evidence_k]
632636
if answer_config.evidence_retrieval

tests/test_paperqa.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ async def test_docs_lifecycle(subtests: SubTests, stub_data_dir: Path) -> None:
458458

459459

460460
@pytest.mark.asyncio
461-
async def test_evidence(docs_fixture) -> None:
461+
async def test_evidence(docs_fixture: Docs) -> None:
462462
debug_settings = Settings.from_name("debug")
463463
evidence = (
464464
await docs_fixture.aget_evidence(
@@ -470,6 +470,19 @@ async def test_evidence(docs_fixture) -> None:
470470
assert len({e.context for e in evidence}) == len(
471471
evidence
472472
), "Expected unique contexts"
473+
texts = {c.text for c in evidence}
474+
assert texts, "Below assertions require at least one text to be used"
475+
476+
# Okay, let's check we can get other evidence using the same underlying sources
477+
other_evidence = (
478+
await docs_fixture.aget_evidence(
479+
PQASession(question="What is an acronym for explainable AI?"),
480+
settings=debug_settings,
481+
)
482+
).contexts
483+
assert texts.intersection(
484+
{c.text for c in other_evidence}
485+
), "We should be able to reuse sources across evidence calls"
473486

474487

475488
@pytest.mark.asyncio

0 commit comments

Comments
 (0)