11import os
2+ import json
23import StringIO
34from zipfile import ZipFile
45
56from lxml import etree
67import requests
78
89from exceptions import (
9- NoContainerError , OperationFailedError ,
10- ConnectionError , MetadataNotFoundError , VersionJsonNotFoundError
10+ NoContainerError , OperationFailedError , UnpublishedDataverseError ,
11+ ConnectionError , MetadataNotFoundError , VersionJsonNotFoundError ,
1112)
1213from file import DataverseFile
1314from settings import SWORD_BOOTSTRAP
@@ -30,7 +31,7 @@ def __init__(self, entry=SWORD_BOOTSTRAP, dataverse=None, edit_uri=None,
3031
3132 self ._entry = etree .XML (entry ) if isinstance (entry , str ) else entry
3233 self ._statement = None
33- self ._json = {}
34+ self ._metadata = {}
3435 self ._id = None
3536
3637 # Updates sword entry from keyword arguments
@@ -169,41 +170,81 @@ def get_state(self, refresh=False):
169170 attribute_value = 'latestVersionState'
170171 ).text
171172
172- def get_json (self , version = 'latest' , refresh = False ):
173- if not refresh and self ._json .get (version ):
174- return self ._json . get ( version )
173+ def get_metadata (self , version = 'latest' , refresh = False ):
174+ if not refresh and self ._metadata .get (version ):
175+ return self ._metadata [ version ]
175176
176177 if not self .dataverse :
177178 raise NoContainerError ('This dataset has not been added to a Dataverse.' )
178179
179- json_url = 'https://{0}/api/datasets/{1}/versions/:{2}' .format (
180+ url = 'https://{0}/api/datasets/{1}/versions/:{2}' .format (
180181 self .connection .host ,
181182 self .id ,
182183 version ,
183184 )
184185
185- resp = requests .get (json_url , params = {'key' : self .connection .token })
186+ resp = requests .get (url , params = {'key' : self .connection .token })
186187
187188 if resp .status_code == 404 :
188189 raise VersionJsonNotFoundError ('JSON metadata could not be found for this version.' )
189190 elif resp .status_code != 200 :
190191 raise ConnectionError ('JSON metadata could not be retrieved.' )
191192
192- self ._json [version ] = resp .json ()['data' ]
193- return self ._json [version ]
193+ metadata = resp .json ()['data' ]
194+ self ._metadata [version ] = metadata
195+
196+ # Update corresponding version metadata if retrieving 'latest'
197+ if version == 'latest' :
198+ latest_version = 'latest-published' if metadata ['versionState' ] == 'RELEASED' else 'draft'
199+ self ._metadata [latest_version ] = metadata
200+
201+ return metadata
202+
203+ def update_metadata (self , metadata ):
204+ """Updates dataset draft with provided metadata.
205+ Will create a draft version if none exists.
206+
207+ :param dict metadata: json retrieved from `get_version_metadata`
208+ """
209+ url = 'https://{0}/api/datasets/{1}/versions/:draft' .format (
210+ self .connection .host ,
211+ self .id ,
212+ )
213+ resp = requests .put (
214+ url ,
215+ headers = {'Content-type' : 'application/json' },
216+ data = json .dumps (metadata ),
217+ params = {'key' : self .connection .token },
218+ )
219+
220+ if resp .status_code != 200 :
221+ raise OperationFailedError ('JSON metadata could not be updated.' )
222+
223+ updated_metadata = resp .json ()['data' ]
224+ self ._metadata ['draft' ] = updated_metadata
225+ self ._metadata ['latest' ] = updated_metadata
226+
227+ def create_draft (self ):
228+ """Create draft version of dataset without changing metadata"""
229+ metadata = self .get_metadata (refresh = True )
230+ if metadata .get ('versionState' ) == 'RELEASED' :
231+ self .update_metadata (metadata )
194232
195233 def publish (self ):
234+ if not self .dataverse .is_published :
235+ raise UnpublishedDataverseError ('Host Dataverse must be published.' )
236+
196237 resp = requests .post (
197238 self .edit_uri ,
198239 headers = {'In-Progress' : 'false' , 'Content-Length' : 0 },
199240 auth = self .connection .auth ,
200241 )
201242
202243 if resp .status_code != 200 :
203- raise OperationFailedError ('The Dataverse could not be published.' )
244+ raise OperationFailedError ('The Dataset could not be published.' )
204245
205- receipt = resp . content
206- self ._refresh (receipt = receipt , published = True )
246+ self . _metadata . pop ( 'draft' , None )
247+ self ._refresh (receipt = resp . content )
207248
208249 def get_file (self , file_name , version = 'latest' , refresh = False ):
209250 files = self .get_files (version , refresh )
@@ -215,7 +256,7 @@ def get_file_by_id(self, file_id, version='latest', refresh=False):
215256
216257 def get_files (self , version = 'latest' , refresh = False ):
217258 try :
218- files_json = self .get_json (version , refresh )['files' ]
259+ files_json = self .get_metadata (version , refresh )['files' ]
219260 return [DataverseFile .from_json (self , file_json )
220261 for file_json in files_json ]
221262 except VersionJsonNotFoundError :
@@ -262,7 +303,7 @@ def upload_file(self, filename, content, zip=True):
262303 auth = self .connection .auth ,
263304 )
264305
265- self .get_json (refresh = True )
306+ self .get_metadata (refresh = True )
266307
267308 def delete_file (self , dataverse_file ):
268309 resp = requests .delete (
@@ -273,25 +314,10 @@ def delete_file(self, dataverse_file):
273314 if resp .status_code != 204 :
274315 raise OperationFailedError ('The file could not be deleted.' )
275316
276- self .get_json (refresh = True )
277-
278- def delete_all_files (self ):
279- for f in self .get_files ():
280- self .delete_file (f )
281-
282- # TODO: DANGEROUS! Will delete all unspecified fields! Deposit receipts only give SOME of the fields
283- # Can potentially be replaced with native API functionality
284- # def update_metadata(self):
285- # depositReceipt = self.hostDataverse.connection.sword.update(
286- # dr=self.lastDepositReceipt,
287- # edit_iri=self.editUri,
288- # edit_media_iri=self.editMediaUri,
289- # metadata_entry=self.entry,
290- # )
291- # self._refresh(deposit_receipt=depositReceipt)
292-
293- # if we perform a server operation, we should refresh the dataset object
294- def _refresh (self , receipt = None , published = False ):
317+ self .get_metadata (refresh = True )
318+
319+ # If we perform a server operation, we should refresh the dataset object
320+ def _refresh (self , receipt = None ):
295321 if receipt :
296322 self .edit_uri = get_element (
297323 receipt ,
@@ -311,8 +337,7 @@ def _refresh(self, receipt=None, published=False):
311337 attribute = 'rel' ,
312338 attribute_value = 'http://purl.org/net/sword/terms/statement'
313339 ).get ('href' )
340+
314341 self .get_statement (refresh = True )
315342 self .get_entry (refresh = True )
316-
317- update_version = 'latest-published' if published else 'latest'
318- self .get_json (update_version , refresh = True )
343+ self .get_metadata ('latest' , refresh = True )
0 commit comments