Skip to content

Commit 8244644

Browse files
authored
Allow author records with just 'death_date' to count as "dated" (internetarchive#11150)
* allow author records with just 'death_date' to count as "dated" fixes internetarchive#10736 * refactor a has_dates() method to simplify logic * explicitly test death_date only matching for internetarchive#10736
1 parent 100f061 commit 8244644

File tree

3 files changed

+44
-8
lines changed

3 files changed

+44
-8
lines changed

openlibrary/catalog/add_book/load_book.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ def find_author(author: dict[str, Any]) -> list["Author"]:
142142
Searches OL for an author by a range of queries.
143143
"""
144144

145+
def has_dates(author: "dict | Author") -> bool:
146+
return 'birth_date' in author or 'death_date' in author
147+
145148
def walk_redirects(obj, seen):
146149
seen.add(obj['key'])
147150
while obj['type']['key'] == '/type/redirect':
@@ -210,17 +213,16 @@ def get_redirected_authors(authors: list["Author"]):
210213
break
211214
match = []
212215
seen = set()
216+
# If author has dates, we only consider dated candidates,
217+
# otherwise only include undated candidates.
213218
for a in things:
214-
key = a['key']
215-
if key in seen:
219+
if key := a['key'] in seen:
216220
continue
217221
seen.add(key)
218-
assert a.type.key == '/type/author'
219-
if 'birth_date' in author and 'birth_date' not in a:
222+
if has_dates(author) != has_dates(a):
220223
continue
221-
if 'birth_date' not in author and 'birth_date' in a:
222-
continue
223-
if not author_dates_match(author, a):
224+
assert a.type.key == '/type/author'
225+
if has_dates(author) and not author_dates_match(author, a):
224226
continue
225227
match.append(a)
226228
if not match:

openlibrary/catalog/utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def author_dates_match(a: dict, b: "dict | Author") -> bool:
5858
Checks if the years of two authors match. Only compares years,
5959
not names or keys. Works by returning False if any year specified in one record
6060
does not match that in the other, otherwise True. If any one author does not have
61-
dates, it will return True.
61+
dates, it will return True (i.e. "possible match").
6262
6363
:param dict a: Author import dict {"name": "Some One", "birth_date": "1960"}
6464
:param dict b: Author import dict {"name": "Some One"}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import pytest
2+
3+
from openlibrary.catalog.utils import author_dates_match
4+
5+
EXISTING = {'birth_date': '1904', 'death_date': '1996'}
6+
7+
8+
MATCH_CASES = [
9+
{'birth_date': '1904', 'death_date': '1996'},
10+
{'death_date': '1996'},
11+
{'birth_date': '1904'},
12+
]
13+
14+
15+
NON_MATCH_CASES = [
16+
{'birth_date': '1794', 'death_date': '1823'},
17+
{'birth_date': '1904', 'death_date': '2005'}, # one date mismatch
18+
]
19+
20+
21+
@pytest.mark.parametrize('a', MATCH_CASES)
22+
def test_author_dates_match_true(a):
23+
assert author_dates_match(a, EXISTING)
24+
25+
26+
@pytest.mark.parametrize('a', NON_MATCH_CASES)
27+
def test_author_dates_match_false(a):
28+
assert not author_dates_match(a, EXISTING)
29+
30+
31+
def test_author_dates_match_death_only():
32+
a = {'death_date': '1996'}
33+
b = {'death_date': '1996'}
34+
assert author_dates_match(a, b)

0 commit comments

Comments
 (0)