|
6 | 6 | import textwrap
|
7 | 7 | from collections.abc import AsyncIterable, Sequence
|
8 | 8 | from copy import deepcopy
|
| 9 | +from datetime import datetime, timedelta |
9 | 10 | from io import BytesIO
|
10 | 11 | from pathlib import Path
|
11 | 12 | from typing import cast
|
@@ -1239,6 +1240,74 @@ def test_dois_resolve_to_correct_journals(doi_journals):
|
1239 | 1240 | assert details.journal == doi_journals["journal"]
|
1240 | 1241 |
|
1241 | 1242 |
|
| 1243 | +def test_docdetails_merge_with_non_list_fields() -> None: |
| 1244 | + """Check republication where the source metadata has different shapes.""" |
| 1245 | + initial_date = datetime(2023, 1, 1) |
| 1246 | + doc1 = DocDetails( |
| 1247 | + citation="Citation 1", |
| 1248 | + publication_date=initial_date, |
| 1249 | + docname="Document 1", |
| 1250 | + dockey="key1", |
| 1251 | + # NOTE: doc1 has non-list bibtex_source and list client_source |
| 1252 | + other={"bibtex_source": "source1", "client_source": ["client1"]}, |
| 1253 | + ) |
| 1254 | + |
| 1255 | + later_publication_date = initial_date + timedelta(weeks=13) |
| 1256 | + doc2 = DocDetails( |
| 1257 | + citation=doc1.citation, |
| 1258 | + publication_date=later_publication_date, |
| 1259 | + docname=doc1.docname, |
| 1260 | + dockey=doc1.dockey, |
| 1261 | + # NOTE: doc2 has list bibtex_source and non-list client_source |
| 1262 | + other={"bibtex_source": ["source2"], "client_source": "client2"}, |
| 1263 | + ) |
| 1264 | + |
| 1265 | + # Merge the two DocDetails instances |
| 1266 | + merged_doc = doc1 + doc2 |
| 1267 | + |
| 1268 | + assert {"source1", "source2"}.issubset( |
| 1269 | + merged_doc.other["bibtex_source"] |
| 1270 | + ), "Expected merge to keep both bibtex sources" |
| 1271 | + assert {"client1", "client2"}.issubset( |
| 1272 | + merged_doc.other["client_source"] |
| 1273 | + ), "Expected merge to keep both client sources" |
| 1274 | + assert isinstance(merged_doc, DocDetails), "Merged doc should also be DocDetails" |
| 1275 | + |
| 1276 | + |
| 1277 | +def test_docdetails_merge_with_list_fields() -> None: |
| 1278 | + """Check republication where the source metadata is the same shape.""" |
| 1279 | + initial_date = datetime(2023, 1, 1) |
| 1280 | + doc1 = DocDetails( |
| 1281 | + citation="Citation 1", |
| 1282 | + publication_date=initial_date, |
| 1283 | + docname="Document 1", |
| 1284 | + dockey="key1", |
| 1285 | + # NOTE: doc1 has list bibtex_source and list client_source |
| 1286 | + other={"bibtex_source": ["source1"], "client_source": ["client1"]}, |
| 1287 | + ) |
| 1288 | + |
| 1289 | + later_publication_date = initial_date + timedelta(weeks=13) |
| 1290 | + doc2 = DocDetails( |
| 1291 | + citation=doc1.citation, |
| 1292 | + publication_date=later_publication_date, |
| 1293 | + docname=doc1.docname, |
| 1294 | + dockey=doc1.dockey, |
| 1295 | + # NOTE: doc2 has list bibtex_source and list client_source |
| 1296 | + other={"bibtex_source": ["source2"], "client_source": ["client2"]}, |
| 1297 | + ) |
| 1298 | + |
| 1299 | + # Merge the two DocDetails instances |
| 1300 | + merged_doc = doc1 + doc2 |
| 1301 | + |
| 1302 | + assert {"source1", "source2"}.issubset( |
| 1303 | + merged_doc.other["bibtex_source"] |
| 1304 | + ), "Expected merge to keep both bibtex sources" |
| 1305 | + assert {"client1", "client2"}.issubset( |
| 1306 | + merged_doc.other["client_source"] |
| 1307 | + ), "Expected merge to keep both client sources" |
| 1308 | + assert isinstance(merged_doc, DocDetails), "Merged doc should also be DocDetails" |
| 1309 | + |
| 1310 | + |
1242 | 1311 | @pytest.mark.vcr
|
1243 | 1312 | @pytest.mark.parametrize("use_partition", [True, False])
|
1244 | 1313 | @pytest.mark.asyncio
|
|
0 commit comments