|
7 | 7 | description="get_metadata queries the caltechDATA (Invenio 3) API\ |
8 | 8 | and returns DataCite-compatable metadata" |
9 | 9 | ) |
10 | | - parser.add_argument( |
11 | | - "output", |
12 | | - help="Output file name", |
13 | | - ) |
| 10 | + parser.add_argument("output", help="Output file name") |
14 | 11 | parser.add_argument("-keywords", nargs="*") |
15 | 12 |
|
16 | 13 | args = parser.parse_args() |
17 | 14 |
|
18 | | - url = 'https://data.caltech.edu/api/records/?size=5000' |
| 15 | + url = "https://data.caltech.edu/api/records/?size=5000" |
19 | 16 |
|
20 | | - search = '' |
| 17 | + search = "" |
21 | 18 | if args.keywords: |
22 | 19 | for key in args.keywords: |
23 | | - if search == '': |
| 20 | + if search == "": |
24 | 21 | search = f'&q=subjects:"{key}"' |
25 | 22 | else: |
26 | | - search = search+f'+"{key}"' |
| 23 | + search = search + f'+"{key}"' |
27 | 24 | url = url + search |
28 | 25 |
|
29 | 26 | response = requests.get(url) |
30 | 27 | hits = response.json() |
31 | 28 |
|
32 | | - outfile = open(args.output,'w') |
| 29 | + outfile = open(args.output, "w") |
33 | 30 | writer = csv.writer(outfile) |
34 | | - writer.writerow(['wkt','name','year','doi']) |
35 | | - |
36 | | - for h in hits['hits']['hits']: |
37 | | - metadata = decustomize_schema(h['metadata']) |
38 | | - if 'geoLocations' in metadata: |
39 | | - doi = 'https://doi.org/'+metadata['identifier']['identifier'] |
40 | | - title=metadata['titles'][0]['title'].split(':')[0] |
41 | | - geo = metadata['geoLocations'] |
42 | | - year = metadata['publicationYear'] |
| 31 | + writer.writerow(["wkt", "name", "year", "doi"]) |
| 32 | + |
| 33 | + for h in hits["hits"]["hits"]: |
| 34 | + metadata = decustomize_schema(h["metadata"]) |
| 35 | + if "geoLocations" in metadata: |
| 36 | + doi = "https://doi.org/" + metadata["identifier"]["identifier"] |
| 37 | + title = metadata["titles"][0]["title"].split(":")[0] |
| 38 | + geo = metadata["geoLocations"] |
| 39 | + year = metadata["publicationYear"] |
43 | 40 | for g in geo: |
44 | | - if 'geoLocationBox' in g: |
45 | | - box = g['geoLocationBox'] |
| 41 | + if "geoLocationBox" in g: |
| 42 | + box = g["geoLocationBox"] |
46 | 43 | p1 = f"{box['eastBoundLongitude']} {box['northBoundLatitude']}" |
47 | 44 | p2 = f"{box['westBoundLongitude']} {box['northBoundLatitude']}" |
48 | 45 | p3 = f"{box['westBoundLongitude']} {box['southBoundLatitude']}" |
49 | 46 | p4 = f"{box['eastBoundLongitude']} {box['southBoundLatitude']}" |
50 | | - wkt = f'POLYGON (({p1}, {p2}, {p3}, {p4}, {p1}))' |
51 | | - writer.writerow([wkt,title,year,doi]) |
52 | | - |
53 | | - if 'geoLocationPoint' in g: |
54 | | - point = g['geoLocationPoint'] |
55 | | - wkt = f"POINT ({point['pointLongitude']} {point['pointLatitude']})" |
56 | | - writer.writerow([wkt,title,year,doi]) |
| 47 | + wkt = f"POLYGON (({p1}, {p2}, {p3}, {p4}, {p1}))" |
| 48 | + writer.writerow([wkt, title, year, doi]) |
57 | 49 |
|
| 50 | + if "geoLocationPoint" in g: |
| 51 | + point = g["geoLocationPoint"] |
| 52 | + wkt = f"POINT ({point['pointLongitude']} {point['pointLatitude']})" |
| 53 | + writer.writerow([wkt, title, year, doi]) |
0 commit comments