Skip to content

Commit 09cb398

Browse files
committed
Add metadata updating and improve metadata handling
- Dataset `_json` and `_get_json` renamed to `_metadata` and `_get_metadata` - Improve logic behind updating metadata for various versions - Raise appropriate error for publishing datasets in unpublished dataverses - Remove `delete_all_files` method (This should not be a one-line operation)
1 parent 34bbbfe commit 09cb398

File tree

2 files changed

+63
-38
lines changed

2 files changed

+63
-38
lines changed

dataverse/connection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def auth(self):
2121
return self.token, None
2222

2323
def get_service_document(self, refresh=False):
24-
if not refresh and self._service_document:
24+
if not refresh and self._service_document is not None:
2525
return self._service_document
2626

2727
resp = requests.get(self.sd_uri, auth=self.auth)

dataverse/dataset.py

Lines changed: 62 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import os
2+
import json
23
import StringIO
34
from zipfile import ZipFile
45

56
from lxml import etree
67
import requests
78

89
from exceptions import (
9-
NoContainerError, OperationFailedError,
10-
ConnectionError, MetadataNotFoundError, VersionJsonNotFoundError
10+
NoContainerError, OperationFailedError, UnpublishedDataverseError,
11+
ConnectionError, MetadataNotFoundError, VersionJsonNotFoundError,
1112
)
1213
from file import DataverseFile
1314
from settings import SWORD_BOOTSTRAP
@@ -30,7 +31,7 @@ def __init__(self, entry=SWORD_BOOTSTRAP, dataverse=None, edit_uri=None,
3031

3132
self._entry = etree.XML(entry) if isinstance(entry, str) else entry
3233
self._statement = None
33-
self._json = {}
34+
self._metadata = {}
3435
self._id = None
3536

3637
# Updates sword entry from keyword arguments
@@ -169,41 +170,81 @@ def get_state(self, refresh=False):
169170
attribute_value='latestVersionState'
170171
).text
171172

172-
def get_json(self, version='latest', refresh=False):
173-
if not refresh and self._json.get(version):
174-
return self._json.get(version)
173+
def get_metadata(self, version='latest', refresh=False):
174+
if not refresh and self._metadata.get(version):
175+
return self._metadata[version]
175176

176177
if not self.dataverse:
177178
raise NoContainerError('This dataset has not been added to a Dataverse.')
178179

179-
json_url = 'https://{0}/api/datasets/{1}/versions/:{2}'.format(
180+
url = 'https://{0}/api/datasets/{1}/versions/:{2}'.format(
180181
self.connection.host,
181182
self.id,
182183
version,
183184
)
184185

185-
resp = requests.get(json_url, params={'key': self.connection.token})
186+
resp = requests.get(url, params={'key': self.connection.token})
186187

187188
if resp.status_code == 404:
188189
raise VersionJsonNotFoundError('JSON metadata could not be found for this version.')
189190
elif resp.status_code != 200:
190191
raise ConnectionError('JSON metadata could not be retrieved.')
191192

192-
self._json[version] = resp.json()['data']
193-
return self._json[version]
193+
metadata = resp.json()['data']
194+
self._metadata[version] = metadata
195+
196+
# Update corresponding version metadata if retrieving 'latest'
197+
if version == 'latest':
198+
latest_version = 'latest-published' if metadata['versionState'] == 'RELEASED' else 'draft'
199+
self._metadata[latest_version] = metadata
200+
201+
return metadata
202+
203+
def update_metadata(self, metadata):
204+
"""Updates dataset draft with provided metadata.
205+
Will create a draft version if none exists.
206+
207+
:param dict metadata: json retrieved from `get_version_metadata`
208+
"""
209+
url = 'https://{0}/api/datasets/{1}/versions/:draft'.format(
210+
self.connection.host,
211+
self.id,
212+
)
213+
resp = requests.put(
214+
url,
215+
headers={'Content-type': 'application/json'},
216+
data=json.dumps(metadata),
217+
params={'key': self.connection.token},
218+
)
219+
220+
if resp.status_code != 200:
221+
raise OperationFailedError('JSON metadata could not be updated.')
222+
223+
updated_metadata = resp.json()['data']
224+
self._metadata['draft'] = updated_metadata
225+
self._metadata['latest'] = updated_metadata
226+
227+
def create_draft(self):
228+
"""Create draft version of dataset without changing metadata"""
229+
metadata = self.get_metadata(refresh=True)
230+
if metadata.get('versionState') == 'RELEASED':
231+
self.update_metadata(metadata)
194232

195233
def publish(self):
234+
if not self.dataverse.is_published:
235+
raise UnpublishedDataverseError('Host Dataverse must be published.')
236+
196237
resp = requests.post(
197238
self.edit_uri,
198239
headers={'In-Progress': 'false', 'Content-Length': 0},
199240
auth=self.connection.auth,
200241
)
201242

202243
if resp.status_code != 200:
203-
raise OperationFailedError('The Dataverse could not be published.')
244+
raise OperationFailedError('The Dataset could not be published.')
204245

205-
receipt = resp.content
206-
self._refresh(receipt=receipt, published=True)
246+
self._metadata.pop('draft', None)
247+
self._refresh(receipt=resp.content)
207248

208249
def get_file(self, file_name, version='latest', refresh=False):
209250
files = self.get_files(version, refresh)
@@ -215,7 +256,7 @@ def get_file_by_id(self, file_id, version='latest', refresh=False):
215256

216257
def get_files(self, version='latest', refresh=False):
217258
try:
218-
files_json = self.get_json(version, refresh)['files']
259+
files_json = self.get_metadata(version, refresh)['files']
219260
return [DataverseFile.from_json(self, file_json)
220261
for file_json in files_json]
221262
except VersionJsonNotFoundError:
@@ -262,7 +303,7 @@ def upload_file(self, filename, content, zip=True):
262303
auth=self.connection.auth,
263304
)
264305

265-
self.get_json(refresh=True)
306+
self.get_metadata(refresh=True)
266307

267308
def delete_file(self, dataverse_file):
268309
resp = requests.delete(
@@ -273,25 +314,10 @@ def delete_file(self, dataverse_file):
273314
if resp.status_code != 204:
274315
raise OperationFailedError('The file could not be deleted.')
275316

276-
self.get_json(refresh=True)
277-
278-
def delete_all_files(self):
279-
for f in self.get_files():
280-
self.delete_file(f)
281-
282-
# TODO: DANGEROUS! Will delete all unspecified fields! Deposit receipts only give SOME of the fields
283-
# Can potentially be replaced with native API functionality
284-
# def update_metadata(self):
285-
# depositReceipt = self.hostDataverse.connection.sword.update(
286-
# dr=self.lastDepositReceipt,
287-
# edit_iri=self.editUri,
288-
# edit_media_iri=self.editMediaUri,
289-
# metadata_entry=self.entry,
290-
# )
291-
# self._refresh(deposit_receipt=depositReceipt)
292-
293-
# if we perform a server operation, we should refresh the dataset object
294-
def _refresh(self, receipt=None, published=False):
317+
self.get_metadata(refresh=True)
318+
319+
# If we perform a server operation, we should refresh the dataset object
320+
def _refresh(self, receipt=None):
295321
if receipt:
296322
self.edit_uri = get_element(
297323
receipt,
@@ -311,8 +337,7 @@ def _refresh(self, receipt=None, published=False):
311337
attribute='rel',
312338
attribute_value='http://purl.org/net/sword/terms/statement'
313339
).get('href')
340+
314341
self.get_statement(refresh=True)
315342
self.get_entry(refresh=True)
316-
317-
update_version = 'latest-published' if published else 'latest'
318-
self.get_json(update_version, refresh=True)
343+
self.get_metadata('latest', refresh=True)

0 commit comments

Comments
 (0)