@@ -37,17 +37,36 @@ def cleanup_metadata(metadata):
3737 for row in reader :
3838 licenses [row ["props__url" ]] = row ["id" ]
3939 rights = []
40- if "rights" in "metadata" :
40+ files = None
41+ if "rights" in metadata ["metadata" ]:
4142 for f in metadata ["metadata" ]["rights" ]:
4243 link = f ["link" ]
4344 if link in licenses :
4445 f ["id" ] = licenses [link ]
4546 else :
4647 f ["title" ]["en" ] = "Unknown"
47- if f ["description" ]["en" ] == "vor" :
48- rights .append (f )
48+ # Not supporting file download till v12
49+ # if f["description"]["en"] == "vor":
50+ # rights.append(f)
51+ # if f["id"] == 'cc-by-4.0':
52+ # doi = metadata["pids"]["doi"]["identifier"]
53+ # response = requests.get('https://api.crossref.org/works/' + doi)
54+ # if response.status_code == 200:
55+ # data = response.json()
56+ # try:
57+ # links = data["message"]["link"]
58+ # for link in links:
59+ # if link["content-type"] == "application/pdf":
60+ # link = link["URL"]
61+ # requests.get(link)
62+ # fname = f"{doi.replace('/','_')}.pdf"
63+ # with open(fname, "wb") as f:
64+ # f.write(response.content)
65+ # files = fname
66+ # except:
67+ # pass
4968 metadata ["metadata" ]["rights" ] = rights
50- return metadata
69+ return metadata , files
5170
5271
5372def get_orcid_works (orcid ):
@@ -152,7 +171,6 @@ def read_outputs():
152171
153172
154173if __name__ == "__main__" :
155-
156174 parser = argparse .ArgumentParser (
157175 description = "Harvest DOIs from Crossref or ORCID and add to CaltechAUTHORS"
158176 )
@@ -161,6 +179,9 @@ def read_outputs():
161179 parser .add_argument ("-doi" , help = "DOI to harvest" )
162180 parser .add_argument ("-actor" , help = "Name of actor to use for review message" )
163181 parser .add_argument ("-report" , help = "Generate a report only" , action = "store_true" )
182+ parser .add_argument (
183+ "-print" , help = "Print out DOIs (no harvesting)" , action = "store_true"
184+ )
164185 args = parser .parse_args ()
165186
166187 harvest_type = args .harvest_type
@@ -176,15 +197,25 @@ def read_outputs():
176197 if harvest_type == "crossref" :
177198 dois = get_crossref_ror ()
178199 review_message = (
179- "Added by Tom during testing, should be a valid article from WOS harvest"
180- #"Automatically added from Crossref based on Caltech ROR affiliation"
200+ "Automatically added from Crossref based on Caltech ROR affiliation"
181201 )
182- dois = ['10.1051/0004-6361/202346526' ,'10.1016/j.palaeo.2023.111756' ]
202+ if args .print :
203+ ostring = "dois="
204+ for doi in dois :
205+ ostring += f" { doi } "
206+ print (ostring )
207+ dois = []
183208 elif harvest_type == "orcid" :
184209 dois = get_orcid_works (args .orcid )
185210 review_message = (
186211 f"Automatically added from ORCID from record { args .orcid } by { args .actor } "
187212 )
213+ if args .print :
214+ ostring = "dois= "
215+ for doi in dois :
216+ ostring += f" { doi } "
217+ print (ostring )
218+ dois = []
188219 elif harvest_type == "doi" :
189220 dois = args .doi .split (" " )
190221 review_message = f"Automatically added by { args .actor } as part of import from DOI list: { args .doi } "
@@ -224,20 +255,22 @@ def read_outputs():
224255 transformed = subprocess .check_output (["doi2rdm" , doi ])
225256 data = transformed .decode ("utf-8" )
226257 data = json .loads (data )
227- data = cleanup_metadata (data )
258+ data , files = cleanup_metadata (data )
228259 response = caltechdata_write (
229260 data ,
230261 token ,
231262 production = True ,
232263 authors = True ,
233264 community = community ,
234265 review_message = review_message ,
266+ files = files ,
235267 )
236- print ("doi=" ,doi )
237- #with open("harvested_dois.txt", "a") as f:
268+ print ("doi=" , doi )
269+ # with open("harvested_dois.txt", "a") as f:
238270 # f.write(doi + "\n")
239- except :
240- print ("error= system error with doi2rdm" )
271+ except Exception as e :
272+ cleaned = e .replace ("'" ,"/" )
273+ print (f"error= system error with doi2rdm { cleaned } " )
241274 else :
242275 print (f"error=DOI { doi } has already been harvested, skipping" )
243276 else :
0 commit comments