Skip to content

Commit 186f83a

Browse files
authored
fix article labels (#32)
* Label is actually required * Update version
1 parent 4d43a7d commit 186f83a

File tree

7 files changed

+106
-86
lines changed

7 files changed

+106
-86
lines changed

src/bibx/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
"read_wos",
2626
]
2727

28-
__version__ = "0.3.0"
28+
__version__ = "0.3.1"
2929

3030

3131
def query_openalex(query: str, limit: int = 600) -> Collection:

src/bibx/_entities/article.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@ def _keep(a: T, b: T) -> T:
99
return a if a is not None else b
1010

1111

12+
def _keep_longest(a: str, b: str) -> str:
13+
return a if len(a) > len(b) else b
14+
15+
1216
@dataclass
1317
class Article:
18+
label: str
1419
ids: set[str]
1520
authors: list[str] = field(default_factory=list)
1621
year: Optional[int] = None
@@ -20,7 +25,6 @@ class Article:
2025
issue: Optional[str] = None
2126
page: Optional[str] = None
2227
doi: Optional[str] = None
23-
_label: Optional[str] = None
2428
_permalink: Optional[str] = None
2529
times_cited: Optional[int] = None
2630
references: list["Article"] = field(default_factory=list)
@@ -31,6 +35,7 @@ class Article:
3135
def merge(self, other: "Article") -> "Article":
3236
"""Merge two articles into a new one."""
3337
return Article(
38+
label=_keep_longest(self.label, other.label),
3439
ids=self.ids.union(other.ids),
3540
authors=self.authors if self.authors else other.authors,
3641
year=_keep(self.year, other.year),
@@ -40,7 +45,6 @@ def merge(self, other: "Article") -> "Article":
4045
issue=_keep(self.issue, other.issue),
4146
page=_keep(self.page, other.page),
4247
doi=_keep(self.doi, other.doi),
43-
_label=_keep(self._label, other._label),
4448
_permalink=_keep(self._permalink, other._permalink),
4549
times_cited=_keep(self.times_cited, other.times_cited),
4650
references=self.references or other.references,
@@ -54,17 +58,17 @@ def key(self) -> str:
5458
return next(iter(sorted(self.ids)))
5559

5660
@property
57-
def label(self) -> str:
58-
if self._label is not None:
59-
return self._label
61+
def simple_label(self) -> Optional[str]:
6062
pieces = {
61-
"AU": self.authors[0].replace(",", "") if self.authors else "anonymous",
63+
"AU": self.authors[0].replace(",", "") if self.authors else None,
6264
"PY": str(self.year) if self.year else None,
6365
"J9": str(self.journal) if self.journal else None,
6466
"VL": f"V{self.volume}" if self.volume else None,
6567
"BP": f"P{self.page}" if self.page else None,
6668
"DI": f"DOI {self.doi}" if self.doi else None,
6769
}
70+
if not any(pieces.values()):
71+
return None
6872
return ", ".join(value for value in pieces.values() if value)
6973

7074
@property
@@ -85,10 +89,17 @@ def simple_id(self) -> Optional[str]:
8589
def __repr__(self) -> str:
8690
return f"Article(ids={self.ids!r}, authors={self.authors!r})"
8791

88-
def add_simple_id(self) -> None:
92+
def add_simple_id(self) -> "Article":
8993
if self.simple_id is None:
90-
return
94+
return self
9195
self.ids.add(f"simple:{self.simple_id}")
96+
return self
97+
98+
def set_simple_label(self) -> "Article":
99+
if self.simple_label is None:
100+
return self
101+
self.label = self.simple_label
102+
return self
92103

93104
def info(
94105
self,

src/bibx/_entities/collection_builders/openalex.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def _extract_doi(url: str) -> str:
7575
@classmethod
7676
def _work_to_article(cls, work: Work) -> Article:
7777
article = Article(
78+
label=work.id,
7879
ids={
7980
f"{source}:{id_}"
8081
if source != "doi"
@@ -93,7 +94,6 @@ def _work_to_article(cls, work: Work) -> Article:
9394
issue=work.biblio.issue,
9495
page=work.biblio.first_page,
9596
doi=cls._extract_doi(work.doi) if work.doi else None,
96-
_label=work.id,
9797
_permalink=work.primary_location and work.primary_location.landing_page_url,
9898
times_cited=work.cited_by_count,
9999
references=[cls._reference_to_article(r) for r in work.referenced_works],
@@ -108,6 +108,7 @@ def _work_to_article(cls, work: Work) -> Article:
108108
@staticmethod
109109
def _reference_to_article(reference: str) -> Article:
110110
return Article(
111+
label=reference,
111112
ids={f"openalex:{reference}"},
112113
_permalink=reference,
113114
sources={"openalex"},

src/bibx/_entities/collection_builders/scopus_bib.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -41,24 +41,29 @@ def _article_from_entry(self, entry: dict) -> Article:
4141
doi = entry.get("doi")
4242
if doi is not None:
4343
ids.add(f"doi:{doi}")
44-
article = Article(
45-
ids=ids,
46-
authors=entry["author"].split(" and "),
47-
year=int(entry["year"]),
48-
title=entry.get("title"),
49-
journal=entry.get("journal"),
50-
volume=entry.get("volume"),
51-
issue=entry.get("issue"),
52-
page=entry.get("art_number"),
53-
doi=entry.get("doi"),
54-
references=list(self._articles_from_references(entry.get("references"))),
55-
keywords=entry.get("keywords", "").split("; "),
56-
extra=entry,
57-
sources={json.dumps(entry)},
58-
times_cited=times_cited,
44+
return (
45+
Article(
46+
label=doi or entry.get("title", "replaceme"),
47+
ids=ids,
48+
authors=entry["author"].split(" and "),
49+
year=int(entry["year"]),
50+
title=entry.get("title"),
51+
journal=entry.get("journal"),
52+
volume=entry.get("volume"),
53+
issue=entry.get("issue"),
54+
page=entry.get("art_number"),
55+
doi=entry.get("doi"),
56+
references=list(
57+
self._articles_from_references(entry.get("references"))
58+
),
59+
keywords=entry.get("keywords", "").split("; "),
60+
extra=entry,
61+
sources={json.dumps(entry)},
62+
times_cited=times_cited,
63+
)
64+
.add_simple_id()
65+
.set_simple_label()
5966
)
60-
article.add_simple_id()
61-
return article
6267

6368
def _articles_from_references(self, references: Optional[str]) -> Iterable[Article]:
6469
if references is None:
@@ -76,13 +81,11 @@ def _article_from_reference(reference: str) -> Article:
7681
author = reference.split(",", maxsplit=2)[0].strip()
7782
match = re.search(r"(10.\d{4,9}/[-._;()/:A-Z0-9]+)", reference)
7883
doi = match.groups()[0] if match else None
79-
article = Article(
84+
return Article(
85+
label=reference,
8086
ids=set() if doi is None else {f"doi:{doi}"},
8187
authors=[author],
8288
year=year,
83-
_label=reference,
8489
doi=doi,
8590
sources={reference},
86-
)
87-
article.add_simple_id()
88-
return article
91+
).add_simple_id()

src/bibx/_entities/collection_builders/scopus_ris.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ def _article_form_reference(cls, scopusref: str) -> Article:
102102
doi, _ = cls._find_doi(scopusref)
103103
if not authors or not year:
104104
raise MissingCriticalInformationError()
105-
article = Article(
105+
return Article(
106+
label=scopusref,
106107
ids=set() if doi is None else {f"doi:{doi}"},
107108
authors=[f"{first_name} {last_name.replace(' ', '').replace('.', '')}"],
108109
year=int(year),
@@ -114,9 +115,7 @@ def _article_form_reference(cls, scopusref: str) -> Article:
114115
volume=volume_info.get("volume"),
115116
page=volume_info.get("page"),
116117
doi=doi,
117-
)
118-
article.add_simple_id()
119-
return article
118+
).add_simple_id()
120119

121120
@classmethod
122121
def _parse_references(cls, refs: list[str]) -> list[Article]:
@@ -134,7 +133,6 @@ def _parse_references(cls, refs: list[str]) -> list[Article]:
134133
def _ris_to_dict(record: str) -> dict[str, list[str]]:
135134
parsed = defaultdict(list)
136135
current = None
137-
138136
for line in record.split("\n"):
139137
match = _RIS_PATTERN.match(line)
140138
if not match:
@@ -163,25 +161,29 @@ def _article_from_record(cls, record: str) -> Article:
163161
authors = data.get("AU", [])
164162
if not authors or not year:
165163
raise MissingCriticalInformationError()
166-
doi = data.get("DO")
167-
article = Article(
168-
ids=set() if doi is None else {f"doi:{doi}"},
169-
title=_joined(data.get("TI")),
170-
authors=authors,
171-
year=year,
172-
journal=_joined(data.get("J2")),
173-
volume=_joined(data.get("VL")),
174-
issue=_joined(data.get("IS")),
175-
page=_joined(data.get("SP")),
176-
doi=_joined(data.get("DO")),
177-
keywords=data.get("KW", []),
178-
references=cls._parse_references(data.get("N1:References", [])),
179-
sources={"scopus"},
180-
extra=data,
181-
times_cited=times_cited,
164+
doi_list = data.get("DO")
165+
doi = doi_list[0] if doi_list else None
166+
return (
167+
Article(
168+
label=doi or "replaceme",
169+
ids=set() if doi is None else {f"doi:{doi}"},
170+
title=_joined(data.get("TI")),
171+
authors=authors,
172+
year=year,
173+
journal=_joined(data.get("J2")),
174+
volume=_joined(data.get("VL")),
175+
issue=_joined(data.get("IS")),
176+
page=_joined(data.get("SP")),
177+
doi=doi,
178+
keywords=data.get("KW", []),
179+
references=cls._parse_references(data.get("N1:References", [])),
180+
sources={"scopus"},
181+
extra=data,
182+
times_cited=times_cited,
183+
)
184+
.add_simple_id()
185+
.set_simple_label()
182186
)
183-
article.add_simple_id()
184-
return article
185187

186188
@classmethod
187189
def _parse_file(cls, file: TextIO) -> Iterable[Article]:

src/bibx/_entities/collection_builders/wos.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -297,26 +297,29 @@ def _parse_article_from_str(cls, article_as_str: str) -> Article:
297297
article_data[field].append(parsed["value"])
298298
processed = cls._parse_all(dict(article_data))
299299
doi = processed.get("DOI")
300-
article = Article(
301-
ids=set() if doi is None else {f"doi:{doi}"},
302-
authors=processed.get("authors", []),
303-
year=processed.get("year"),
304-
title=processed.get("title"),
305-
journal=processed.get("source_abbreviation"),
306-
volume=processed.get("volume"),
307-
issue=processed.get("issue"),
308-
page=processed.get("beginning_page"),
309-
doi=doi,
310-
times_cited=processed.get("times_cited"),
311-
references=list(
312-
cls._get_articles_from_references(processed.get("references"))
313-
),
314-
keywords=processed.get("keywords", []),
315-
extra=processed,
316-
sources={article_as_str},
300+
return (
301+
Article(
302+
label=doi or "replaceme",
303+
ids=set() if doi is None else {f"doi:{doi}"},
304+
authors=processed.get("authors", []),
305+
year=processed.get("year"),
306+
title=processed.get("title"),
307+
journal=processed.get("source_abbreviation"),
308+
volume=processed.get("volume"),
309+
issue=processed.get("issue"),
310+
page=processed.get("beginning_page"),
311+
doi=doi,
312+
times_cited=processed.get("times_cited"),
313+
references=list(
314+
cls._get_articles_from_references(processed.get("references"))
315+
),
316+
keywords=processed.get("keywords", []),
317+
extra=processed,
318+
sources={article_as_str},
319+
)
320+
.add_simple_id()
321+
.set_simple_label()
317322
)
318-
article.add_simple_id()
319-
return article
320323

321324
@classmethod
322325
def _parse_reference_from_str(cls, reference: str) -> Article:
@@ -327,8 +330,8 @@ def _parse_reference_from_str(cls, reference: str) -> Article:
327330
processed = cls._parse_all(data)
328331
doi = processed.get("DOI")
329332
article = Article(
333+
label=reference,
330334
ids=set() if doi is None else {f"doi:{doi}"},
331-
_label=reference,
332335
title=processed.get("title"),
333336
authors=processed.get("authors", []),
334337
# FIXME: Year is required here

0 commit comments

Comments
 (0)