Skip to content

Commit a1c53d3

Browse files
authored
Documenting citation style (#817)
1 parent f653903 commit a1c53d3

File tree

3 files changed

+25
-19
lines changed

3 files changed

+25
-19
lines changed

paperqa/docs.py

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from paperqa.settings import MaybeSettings, get_settings
4343
from paperqa.types import Doc, DocDetails, DocKey, PQASession, Text
4444
from paperqa.utils import (
45+
citation_to_docname,
4546
gather_with_concurrency,
4647
get_loop,
4748
maybe_is_html,
@@ -306,23 +307,7 @@ async def aadd( # noqa: PLR0912
306307
):
307308
citation = f"Unknown, {os.path.basename(path)}, {datetime.now().year}"
308309

309-
if docname is None:
310-
# get first name and year from citation
311-
match = re.search(r"([A-Z][a-z]+)", citation)
312-
if match is not None:
313-
author = match.group(1)
314-
else:
315-
# panicking - no word??
316-
raise ValueError(
317-
f"Could not parse docname from citation {citation}. "
318-
"Consider just passing key explicitly - e.g. docs.py "
319-
"(path, citation, key='mykey')"
320-
)
321-
year = ""
322-
match = re.search(r"(\d{4})", citation)
323-
if match is not None:
324-
year = match.group(1)
325-
docname = f"{author}{year}"
310+
docname = citation_to_docname(citation) if docname is None else docname
326311
docname = self._get_unique_name(docname)
327312

328313
doc = Doc(docname=docname, citation=citation, dockey=dockey)
@@ -801,8 +786,8 @@ async def aquery( # noqa: PLR0912
801786
answer_text = answer_result.text
802787
session.add_tokens(answer_result)
803788
# it still happens
804-
if prompt_config.EXAMPLE_CITATION in answer_text:
805-
answer_text = answer_text.replace(prompt_config.EXAMPLE_CITATION, "")
789+
if (ex_citation := prompt_config.EXAMPLE_CITATION) in answer_text:
790+
answer_text = answer_text.replace(ex_citation, "")
806791
for c in filtered_contexts:
807792
name = c.text.name
808793
citation = c.text.doc.formatted_citation

paperqa/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ def get_formatted_variables(s: str) -> set[str]:
259259
class PromptSettings(BaseModel):
260260
model_config = ConfigDict(extra="forbid", validate_assignment=True)
261261

262+
# MLA parenthetical in-text citation, SEE: https://nwtc.libguides.com/citations/MLA#s-lg-box-707489
262263
EXAMPLE_CITATION: ClassVar[str] = "(Example2012Example pages 3-4)"
263264

264265
summary: str = summary_prompt

paperqa/utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,3 +544,23 @@ def logging_filters(
544544
log_with_filter = logging.getLogger(logger_name)
545545
for log_filter_to_remove in log_filters_to_remove:
546546
log_with_filter.removeFilter(log_filter_to_remove)
547+
548+
549+
def citation_to_docname(citation: str) -> str:
550+
"""Create a docname that follows MLA parenthetical in-text citation."""
551+
# get first name and year from citation
552+
match = re.search(r"([A-Z][a-z]+)", citation)
553+
if match is not None:
554+
author = match.group(1)
555+
else:
556+
# panicking - no word??
557+
raise ValueError(
558+
f"Could not parse docname from citation {citation}. "
559+
"Consider just passing key explicitly - e.g. docs.py "
560+
"(path, citation, key='mykey')"
561+
)
562+
year = ""
563+
match = re.search(r"(\d{4})", citation)
564+
if match is not None:
565+
year = match.group(1)
566+
return f"{author}{year}"

0 commit comments

Comments
 (0)