|
| 1 | +import os, json, csv, argparse |
| 2 | +import requests |
| 3 | +from caltechdata_api import decustomize_schema |
| 4 | + |
| 5 | +if __name__ == "__main__": |
| 6 | + parser = argparse.ArgumentParser( |
| 7 | + description="get_metadata queries the caltechDATA (Invenio 3) API\ |
| 8 | + and returns DataCite-compatable metadata" |
| 9 | + ) |
| 10 | + parser.add_argument( |
| 11 | + "output", |
| 12 | + help="Output file name", |
| 13 | + ) |
| 14 | + parser.add_argument("-keywords", nargs="*") |
| 15 | + |
| 16 | + args = parser.parse_args() |
| 17 | + |
| 18 | + url = 'https://data.caltech.edu/api/records/?size=5000' |
| 19 | + |
| 20 | + search = '' |
| 21 | + if args.keywords: |
| 22 | + for key in args.keywords: |
| 23 | + if search == '': |
| 24 | + search = f'&q=subjects:"{key}"' |
| 25 | + else: |
| 26 | + search = search+f'+"{key}"' |
| 27 | + url = url + search |
| 28 | + |
| 29 | + response = requests.get(url) |
| 30 | + hits = response.json() |
| 31 | + |
| 32 | + outfile = open(args.output,'w') |
| 33 | + writer = csv.writer(outfile) |
| 34 | + writer.writerow(['lat','lon','name']) |
| 35 | + |
| 36 | + for h in hits['hits']['hits']: |
| 37 | + metadata = decustomize_schema(h['metadata']) |
| 38 | + if 'geoLocations' in metadata: |
| 39 | + geo = metadata['geoLocations'] |
| 40 | + for g in geo: |
| 41 | + #if 'geoLocationBox' in g: |
| 42 | + # box = g['geoLocationBox'] |
| 43 | + # lat=[box['northBoundLatitude'],box['northBoundLatitude'],box['southBoundLatitude'],box['southBoundLatitude']] |
| 44 | + # lon=[box['eastBoundLongitude'],box['westBoundLongitude'],box['eastBoundLongitude'],box['westBoundLongitude']] |
| 45 | + # tlon,tlat = transform(from_proj,to_proj,lon,lat) |
| 46 | + # pt_lat=pt_lat+tlat |
| 47 | + # pt_lon= pt_lon+tlon |
| 48 | + # cen = metadata['publicationYear'][1] |
| 49 | + # dec = metadata['publicationYear'][2] |
| 50 | + # identifier.append(metadata['identifier']['identifier']) |
| 51 | + # author.append(metadata['creators'][0]['creatorName']) |
| 52 | + # title.append(metadata['titles'][0]['title'].split(':')[0]) |
| 53 | + # year.append(metadata['publicationYear']) |
| 54 | + # color.append(clo) |
| 55 | + # x0 = x0 + [tlon[0],tlon[2],tlon[0],tlon[1]] |
| 56 | + # x1 = x1 + [tlon[1],tlon[3],tlon[2],tlon[3]] |
| 57 | + # y0 = y0 + [tlat[0],tlat[2],tlat[0],tlat[1]] |
| 58 | + # y1 = y1 + [tlat[1],tlat[3],tlat[2],tlat[3]] |
| 59 | + if 'geoLocationPoint' in g: |
| 60 | + point = g['geoLocationPoint'] |
| 61 | + #tlon,tlat =\ |
| 62 | + #transform(from_proj,to_proj,point['pointLongitude'],point['pointLatitude']) |
| 63 | + #pt_lat=pt_lat+[tlat] |
| 64 | + #pt_lon= pt_lon+[tlon] |
| 65 | + #identifier=identifier+[metadata['identifier']['identifier']] |
| 66 | + #author=author+[metadata['creators'][0]['creatorName']] |
| 67 | + title=metadata['titles'][0]['title'].split(':')[0] |
| 68 | + lat = point['pointLatitude'] |
| 69 | + lon = point['pointLongitude'] |
| 70 | + writer.writerow([lat,lon,title]) |
| 71 | + #year = year+[metadata['publicationYear']] |
| 72 | + #cen = metadata['publicationYear'][1] |
| 73 | + #dec = metadata['publicationYear'][2] |
| 74 | + |
0 commit comments