Skip to content

Commit 8b9a5b5

Browse files
committed
Merge pull request #9 from IQSS/feature/dataset-versions
Switch to native API for to list files from any version
2 parents 8f10d96 + f6b259f commit 8b9a5b5

File tree

3 files changed

+30
-80
lines changed

3 files changed

+30
-80
lines changed

dataverse/dataset.py

Lines changed: 20 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77

88
from exceptions import (
99
MethodNotAllowedError, NoContainerError, OperationFailedError,
10-
ConnectionError, MetadataNotFoundError, UnpublishedDatasetError
10+
ConnectionError, MetadataNotFoundError, VersionJsonNotFoundError
1111
)
1212
from file import DataverseFile
1313
from settings import SWORD_BOOTSTRAP
14-
from utils import get_element, get_elements, get_files_in_path, add_field
14+
from utils import get_element, get_files_in_path, add_field
1515

1616

1717
class Dataset(object):
@@ -30,7 +30,7 @@ def __init__(self, entry=SWORD_BOOTSTRAP, dataverse=None, edit_uri=None,
3030
self._entry = etree.XML(entry) if isinstance(entry, str) else entry
3131
self._statement = None
3232
self._state = None
33-
self._json = None
33+
self._json = {}
3434
self._id = None
3535

3636
# Updates sword entry from keyword arguments
@@ -100,24 +100,6 @@ def id(self):
100100

101101
raise MetadataNotFoundError('The dataset ID could not be found.')
102102

103-
def get_contents(self, refresh=False):
104-
if not refresh and self._contents_json:
105-
return self._contents_json
106-
107-
content_uri = 'https://{0}/api/dataverses/{1}/contents'.format(
108-
self.connection.host, self.alias
109-
)
110-
resp = requests.get(
111-
content_uri,
112-
params={'key': self.connection.token}
113-
)
114-
115-
if resp.status_code != 200:
116-
raise ConnectionError('Atom entry could not be retrieved.')
117-
118-
self._contents_json = resp.json()
119-
return self._contents_json
120-
121103
@property
122104
def citation(self):
123105
return get_element(
@@ -188,51 +170,43 @@ def get_state(self, refresh=False):
188170
).text
189171
return self._state
190172

191-
def get_json(self, refresh=False):
192-
if not refresh and self._json:
193-
return self._json
173+
def get_json(self, version="latest", refresh=False):
174+
if not refresh and self._json.get(version):
175+
return self._json.get(version)
194176

195177
if not self.dataverse:
196178
raise NoContainerError('This dataset has not been added to a Dataverse.')
197179

198-
# TODO: Allow specification of other versions
199-
json_url = 'https://{0}/api/datasets/{1}/versions/:latest-published'.format(
180+
json_url = 'https://{0}/api/datasets/{1}/versions/:{2}'.format(
200181
self.connection.host,
201-
self.id
182+
self.id,
183+
version,
202184
)
203185

204186
resp = requests.get(json_url, params={'key': self.connection.token})
205187

206188
if resp.status_code == 404:
207-
raise UnpublishedDatasetError('JSON metadata cannot be retried for an unpublished dataset.')
189+
raise VersionJsonNotFoundError('JSON metadata could not be found for this version.')
208190
elif resp.status_code != 200:
209191
raise ConnectionError('JSON metadata could not be retrieved.')
210192

211-
self._json = resp.json()['data']
212-
return self._json
193+
self._json[version] = resp.json()['data']
194+
return self._json[version]
213195

214-
def get_file(self, file_name, published=False, refresh=True):
215-
files = self.get_files(published, refresh)
196+
def get_file(self, file_name, version="latest", refresh=True):
197+
files = self.get_files(version, refresh)
216198
return next((f for f in files if f.name == file_name), None)
217199

218-
def get_file_by_id(self, file_id, published=False, refresh=True):
219-
files = self.get_files(published, refresh)
200+
def get_file_by_id(self, file_id, version="latest", refresh=True):
201+
files = self.get_files(version, refresh)
220202
return next((f for f in files if f.id == file_id), None)
221203

222-
def get_files(self, published=False, refresh=True):
223-
if published:
224-
return self.get_published_files(refresh)
225-
226-
# TODO: Should the native API be preferred?
227-
elements = get_elements(self.get_statement(refresh), 'entry')
228-
return [DataverseFile.from_statement(self, element)
229-
for element in elements]
230-
231-
def get_published_files(self, refresh=True):
204+
def get_files(self, version="latest", refresh=True):
232205
try:
206+
files_json = self.get_json(version, refresh)['files']
233207
return [DataverseFile.from_json(self, file_json)
234-
for file_json in self.get_json(refresh)['files']]
235-
except UnpublishedDatasetError:
208+
for file_json in files_json]
209+
except VersionJsonNotFoundError:
236210
return []
237211

238212
def add_file(self, filepath):
@@ -292,11 +266,6 @@ def publish(self):
292266
self._refresh(receipt=receipt)
293267

294268
def delete_file(self, dataverse_file):
295-
if dataverse_file.is_published:
296-
raise MethodNotAllowedError(
297-
'Published versions of files cannot be deleted.'
298-
)
299-
300269
resp = requests.delete(
301270
dataverse_file.edit_media_uri,
302271
auth=self.connection.auth,

dataverse/exceptions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,6 @@ class UnpublishedDataverseError(DataverseError):
4343
pass
4444

4545

46-
class UnpublishedDatasetError(DataverseError):
47-
"""Raised when a request requires that a dataset first be published"""
46+
class VersionJsonNotFoundError(DataverseError):
47+
"""Raised when requested json data for a version is not found"""
4848
pass

dataverse/file.py

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,18 @@
1-
import urlparse
2-
3-
from exceptions import InsufficientMetadataError
4-
from utils import get_element, sanitize
1+
from utils import sanitize
52

63

74
class DataverseFile(object):
85
def __init__(self, dataset, name, file_id=None, edit_media_uri=None):
96
self.dataset = dataset
107
self.name = sanitize(name)
8+
self.id = file_id
119

12-
if edit_media_uri:
13-
self.is_published = False
14-
self.edit_media_uri = edit_media_uri
15-
self.id = edit_media_uri.split('/')[-2]
16-
self.download_url = 'http://{0}/api/access/datafile/{1}'.format(
17-
dataset.connection.host, self.id
18-
)
19-
elif file_id:
20-
self.is_published = True
21-
self.id = file_id
22-
self.download_url = 'http://{0}/api/access/datafile/{1}'.format(
23-
dataset.connection.host, self.id
24-
)
25-
else:
26-
raise InsufficientMetadataError(
27-
'Files must have a file id or edit media uri.'
28-
)
29-
30-
@classmethod
31-
def from_statement(cls, dataset, element):
32-
edit_media_uri = get_element(element, 'content').get('src')
33-
name = edit_media_uri.rsplit("/", 1)[-1]
34-
return cls(dataset, name, edit_media_uri=edit_media_uri)
10+
self.download_url = 'http://{0}/api/access/datafile/{1}'.format(
11+
dataset.connection.host, self.id
12+
)
13+
self.edit_media_uri = 'https://{0}/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/{1}'.format(
14+
dataset.connection.host, self.id
15+
)
3516

3617
@classmethod
3718
def from_json(cls, dataset, json):

0 commit comments

Comments
 (0)