|
1 | 1 | from waybackprov import get_collection, get_crawls, get_depth, deepest_collection, cdx |
2 | 2 |
|
| 3 | + |
3 | 4 | def test_coll(): |
4 | | - coll = get_collection('ArchiveIt-Collection-2410') |
5 | | - assert coll['title'] == 'University of Maryland' |
| 5 | + coll = get_collection("ArchiveIt-Collection-2410") |
| 6 | + assert coll["title"] == "University of Maryland" |
| 7 | + |
6 | 8 |
|
7 | 9 | def test_get_crawls(): |
8 | | - crawls = list(get_crawls('https://mith.umd.edu')) |
| 10 | + crawls = list(get_crawls("https://mith.umd.edu")) |
9 | 11 | assert len(crawls) > 0 |
10 | | - assert crawls[0]['timestamp'] |
11 | | - assert crawls[0]['url'] |
12 | | - assert crawls[0]['status'] |
13 | | - assert crawls[0]['collections'] |
14 | | - assert len(crawls[0]['collections']) > 0 |
| 12 | + assert crawls[0]["timestamp"] |
| 13 | + assert crawls[0]["url"] |
| 14 | + assert crawls[0]["status"] |
| 15 | + assert crawls[0]["collections"] |
| 16 | + assert len(crawls[0]["collections"]) > 0 |
| 17 | + |
15 | 18 |
|
16 | 19 | def test_depth(): |
17 | | - assert get_depth('ArchiveIt-Collection-2410') == 4 |
18 | | - assert get_depth('wikipediaoutlinks00003') == 3 |
| 20 | + assert get_depth("ArchiveIt-Collection-2410") == 4 |
| 21 | + assert get_depth("wikipediaoutlinks00003") == 3 |
| 22 | + |
19 | 23 |
|
20 | 24 | def test_deepest_collection(): |
21 | 25 | colls = [ |
22 | | - 'ArchiveIt-Partner-408', |
23 | | - 'archiveitdigitalcollection', |
24 | | - 'web', |
25 | | - 'archiveitpartners', |
26 | | - 'ArchiveIt-Collection-2410' |
| 26 | + "ArchiveIt-Partner-408", |
| 27 | + "archiveitdigitalcollection", |
| 28 | + "web", |
| 29 | + "archiveitpartners", |
| 30 | + "ArchiveIt-Collection-2410", |
27 | 31 | ] |
28 | | - assert deepest_collection(colls) == 'ArchiveIt-Collection-2410' |
| 32 | + assert deepest_collection(colls) == "ArchiveIt-Collection-2410" |
| 33 | + |
29 | 34 |
|
30 | 35 | def test_loop(): |
31 | 36 | # weirdly, some collections can contain themselves when there is a loop |
32 | 37 | # e.g. coll1 ∃ coll2 and coll2 ∃ coll1 |
33 | | - assert get_depth('ArchiveIt-Partner-1140') == 3 |
| 38 | + assert get_depth("ArchiveIt-Partner-1140") == 3 |
| 39 | + |
34 | 40 |
|
35 | 41 | def test_prefix(): |
36 | | - crawls = get_crawls('https://twitter.com/Guccifer_2', prefix=True, match='/status/\d+$') |
| 42 | + crawls = get_crawls( |
| 43 | + "https://twitter.com/Guccifer_2", prefix=True, match="/status/\d+$" |
| 44 | + ) |
37 | 45 | crawl = next(crawls) |
38 | | - assert crawl['url'] |
| 46 | + assert crawl["url"] |
| 47 | + |
39 | 48 |
|
40 | 49 | def test_cdx(): |
41 | | - urls = cdx('https://twitter.com/Guccifer_2', match='/status/\d+$', start_year=2016, end_year=2018) |
| 50 | + urls = cdx( |
| 51 | + "https://twitter.com/Guccifer_2", |
| 52 | + match="/status/\d+$", |
| 53 | + start_year=2016, |
| 54 | + end_year=2018, |
| 55 | + ) |
42 | 56 | assert len(list(urls)) == 132 |
43 | 57 |
|
| 58 | + |
44 | 59 | def test_missing(): |
45 | | - crawls = list(get_crawls('https://twitter.com/slavresistance/status/1016697918970105857/')) |
| 60 | + crawls = list( |
| 61 | + get_crawls("https://twitter.com/slavresistance/status/1016697918970105857/") |
| 62 | + ) |
46 | 63 | assert len(crawls) == 0 |
0 commit comments