34
34
from renku .core .dataset .providers .repository import RepositoryImporter , make_request
35
35
from renku .core .util import communication
36
36
from renku .core .util .doi import is_doi
37
+ from renku .core .util .requests import get_redirect_url
37
38
from renku .core .util .urls import remove_credentials
38
39
from renku .domain_model .project_context import project_context
39
40
@@ -80,7 +81,9 @@ def supports(uri):
80
81
@staticmethod
81
82
def get_record_id (uri ):
82
83
"""Extract record id from URI."""
83
- return urlparse (uri ).path .split ("/" )[- 1 ]
84
+ parts = urlparse (uri ).path .split ("/" )
85
+ parts = [p for p in parts if p .isdigit ()]
86
+ return parts [- 1 ]
84
87
85
88
@staticmethod
86
89
def get_export_parameters () -> List ["ProviderParameter" ]:
@@ -121,7 +124,7 @@ def __init__(self, *, uri: str, original_uri, json: Dict[str, Any]):
121
124
122
125
metadata = self ._json .pop ("metadata" , {})
123
126
self ._json ["metadata" ] = ZenodoMetadataSerializer .from_metadata (metadata ) if metadata is not None else None
124
- record_id = self ._json .pop ("record_id" , None )
127
+ record_id = self ._json .pop ("record_id" , None ) or self . _json . pop ( "recid" , None )
125
128
self ._json ["record_id" ] = str (record_id ) if record_id is not None else None
126
129
127
130
# NOTE: Make sure that these properties have a default value
@@ -136,11 +139,11 @@ def version(self):
136
139
@property
137
140
def latest_uri (self ):
138
141
"""Get URI of latest version."""
139
- return self ._json ["links" ].get ("latest_html" )
142
+ return get_redirect_url ( self ._json ["links" ].get ("latest" ) )
140
143
141
144
def is_latest_version (self ):
142
145
"""Check if this record is the latest version."""
143
- return ZenodoProvider .get_record_id (self ._json [ "links" ]. get ( "latest_html" ) ) == self ._json ["record_id" ]
146
+ return ZenodoProvider .get_record_id (self .latest_uri ) == self ._json ["record_id" ]
144
147
145
148
def get_jsonld (self ):
146
149
"""Get record metadata as jsonld."""
@@ -173,18 +176,19 @@ def fetch_provider_dataset(self) -> "ProviderDataset":
173
176
from renku .domain_model .dataset import Url , generate_default_slug
174
177
175
178
class ZenodoDatasetSchema (ProviderDatasetSchema ):
176
- """Schema for Dataverse datasets."""
179
+ """Schema for Zenodo datasets."""
177
180
178
181
@pre_load
179
182
def fix_data (self , data , ** kwargs ):
180
- """Fix data that is received from Dataverse ."""
183
+ """Fix data that is received from Zenodo ."""
181
184
# Fix context
182
185
context = data .get ("@context" )
183
186
if context and isinstance (context , str ):
187
+ if not context .endswith ("/" ):
188
+ context = f"{ context } /"
184
189
if context == "https://schema.org/" :
185
190
context = "http://schema.org/"
186
191
data ["@context" ] = {"@base" : context , "@vocab" : context }
187
-
188
192
# Add type to creators
189
193
creators = data .get ("creator" , [])
190
194
for c in creators :
@@ -194,6 +198,10 @@ def fix_data(self, data, **kwargs):
194
198
license = data .get ("license" )
195
199
if license and isinstance (license , dict ):
196
200
data ["license" ] = license .get ("url" , "" )
201
+ # fix keywords to be a list
202
+ keywords = data .get ("keywords" )
203
+ if keywords and isinstance (keywords , str ):
204
+ data ["keywords" ] = [k .strip () for k in keywords .split ("," )]
197
205
198
206
# Delete existing isPartOf
199
207
data .pop ("isPartOf" , None )
@@ -228,17 +236,17 @@ def fix_data(self, data, **kwargs):
228
236
class ZenodoFileSerializer :
229
237
"""Zenodo record file."""
230
238
231
- def __init__ (self , * , id = None , checksum = None , links = None , filename = None , filesize = None ):
239
+ def __init__ (self , * , id = None , checksum = None , links = None , key = None , size = None , ** kwargs ):
232
240
self .id = id
233
241
self .checksum = checksum
234
242
self .links = links
235
- self .filename = filename
236
- self .filesize = filesize
243
+ self .filename = key
244
+ self .filesize = size
237
245
238
246
@property
239
247
def remote_url (self ):
240
248
"""Get remote URL as ``urllib.ParseResult``."""
241
- return urllib .parse .urlparse (self .links ["download " ])
249
+ return urllib .parse .urlparse (self .links ["self " ])
242
250
243
251
@property
244
252
def type (self ):
@@ -325,7 +333,10 @@ def from_metadata(cls, metadata: Dict[str, Any]) -> "ZenodoMetadataSerializer":
325
333
class ZenodoExporter (ExporterApi ):
326
334
"""Zenodo export manager."""
327
335
328
- HEADERS = {"Content-Type" : "application/json" }
336
+ HEADERS = {
337
+ "Content-Type" : "application/json" ,
338
+ "Referer" : f"https://{ os .environ .get ('RENKU_DOMAIN' , 'zenodo.org' )} " ,
339
+ }
329
340
330
341
def __init__ (self , dataset , publish , tag ):
331
342
super ().__init__ (dataset )
@@ -503,7 +514,9 @@ def publish_deposition(self):
503
514
"""Publish existing deposition."""
504
515
from renku .core .util import requests
505
516
506
- response = requests .post (url = self .publish_url , params = self .exporter .default_params )
517
+ response = requests .post (
518
+ url = self .publish_url , params = self .exporter .default_params , headers = self .exporter .HEADERS
519
+ )
507
520
self ._check_response (response )
508
521
509
522
return response
@@ -517,14 +530,21 @@ def _check_response(response):
517
530
except errors .RequestError :
518
531
if response .status_code == 400 :
519
532
err_response = response .json ()
520
- messages = [
521
- '"{}" failed with "{}"' .format (err ["field" ], err ["message" ]) for err in err_response ["errors" ]
522
- ]
533
+ if "errors" in err_response :
534
+ messages = [
535
+ '"{}" failed with "{}"' .format (err ["field" ], ", " .join (err ["messages" ]))
536
+ for err in err_response ["errors" ]
537
+ ]
538
+ elif "message" in err_response :
539
+ messages = [err_response ["message" ]]
540
+ else :
541
+ messages = [response .text ()]
523
542
524
543
raise errors .ExportError (
525
544
"\n " + "\n " .join (messages ) + "\n See `renku dataset edit -h` for details on how to edit" " metadata"
526
545
)
527
546
else :
547
+ print (response .status_code )
528
548
raise errors .ExportError (response .content )
529
549
530
550
0 commit comments