Skip to content

Commit b10b8e9

Browse files
authored
fix: conform to the new scopus format (#48)
* fix: conform to the new scopus format Scopus changed the format by not providing the Abbreviated Source Title and changing the format of the authors. We'll use the Source title field now and the authors seem to come in `initials, last name, first name` formats. So we're going to re-format the authors as `last name, initials` as it was before. * prepare release * fix typo
1 parent abeade5 commit b10b8e9

File tree

3 files changed

+26
-3
lines changed

3 files changed

+26
-3
lines changed

src/bibx/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
"read_wos",
2929
]
3030

31-
__version__ = "0.8.0"
31+
__version__ = "0.9.0"
3232

3333

3434
def query_openalex(

src/bibx/cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ def openalex(
109109
rprint(graph)
110110

111111

112+
@app.command()
113+
def csv(filename: str) -> None:
114+
"""Parse a scopus CSV file and print the collection."""
115+
with open(filename) as f:
116+
c = read_scopus_csv(f)
117+
rprint(list(c.citation_pairs))
118+
119+
112120
def main() -> None:
113121
"""Entry point for the CLI."""
114122
app()

src/bibx/sources/scopus_csv.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
from .base import Source
1515

16+
_NUM_AUTHOR_PARTS = 3
17+
1618
logger = logging.getLogger(__name__)
1719

1820

@@ -24,6 +26,19 @@ def _split_str(value: str | None) -> list[str]:
2426
return value.strip().split("; ") if value else []
2527

2628

29+
def _rotate_authors(authors: list[str]) -> list[str]:
30+
result = []
31+
for author in authors:
32+
parts = author.split(", ")
33+
if len(parts) != _NUM_AUTHOR_PARTS:
34+
logger.debug("unexpected author format: %s", author)
35+
result.append(author)
36+
continue
37+
initials, last, _ = parts
38+
result.append(f"{last}, {initials}")
39+
return result
40+
41+
2742
class Row(BaseModel):
2843
"""Row model for Scopus CSV data."""
2944

@@ -34,7 +49,7 @@ class Row(BaseModel):
3449
]
3550
year: Annotated[int, Field(validation_alias="Year")]
3651
title: Annotated[str, Field(validation_alias="Title")]
37-
journal: Annotated[str, Field(validation_alias="Abbreviated Source Title")]
52+
journal: Annotated[str, Field(validation_alias="Source title")]
3853
volume: Annotated[
3954
str | None,
4055
Field(validation_alias="Volume"),
@@ -110,7 +125,7 @@ def _parse_file(self, file: TextIO) -> Generator[Article, None, None]:
110125
label="",
111126
ids=set(),
112127
title=datum.title,
113-
authors=datum.authors,
128+
authors=_rotate_authors(datum.authors),
114129
year=datum.year,
115130
journal=datum.journal,
116131
volume=datum.volume,

0 commit comments

Comments
 (0)