Skip to content

Commit 50e4a16

Browse files
committed
moving methods to api.v1.datasets
1 parent 2852221 commit 50e4a16

File tree

4 files changed

+382
-58
lines changed

4 files changed

+382
-58
lines changed

pyclowder/api/v1/datasets.py

Lines changed: 371 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,371 @@
1+
2+
def create_empty(connector, host, key, datasetname, description, parentid=None, spaceid=None, token=None):
3+
"""Create a new dataset in Clowder.
4+
5+
Keyword arguments:
6+
connector -- connector information, used to get missing parameters and send status updates
7+
host -- the clowder host, including http and port, should end with a /
8+
key -- the secret key to login to clowder
9+
datasetname -- name of new dataset to create
10+
description -- description of new dataset
11+
parentid -- id of parent collection
12+
spaceid -- id of the space to add dataset to
13+
"""
14+
logger = logging.getLogger(__name__)
15+
16+
url = '%sapi/datasets/createempty?key=%s' % (host, key)
17+
18+
if parentid:
19+
if spaceid:
20+
result = requests.post(url, headers={"Content-Type": "application/json"},
21+
data=json.dumps({"name": datasetname, "description": description,
22+
"collection": [parentid], "space": [spaceid]}),
23+
verify=connector.ssl_verify if connector else True)
24+
else:
25+
result = requests.post(url, headers={"Content-Type": "application/json"},
26+
data=json.dumps({"name": datasetname, "description": description,
27+
"collection": [parentid]}),
28+
verify=connector.ssl_verify if connector else True)
29+
else:
30+
if spaceid:
31+
result = requests.post(url, headers={"Content-Type": "application/json"},
32+
data=json.dumps({"name": datasetname, "description": description,
33+
"space": [spaceid]}),
34+
verify=connector.ssl_verify if connector else True)
35+
else:
36+
result = requests.post(url, headers={"Content-Type": "application/json"},
37+
data=json.dumps({"name": datasetname, "description": description}),
38+
verify=connector.ssl_verify if connector else True)
39+
40+
result.raise_for_status()
41+
42+
datasetid = result.json()['id']
43+
logger.debug("dataset id = [%s]", datasetid)
44+
45+
return datasetid
46+
47+
def delete(connector, host, key, datasetid, token=None):
48+
"""Delete dataset from Clowder.
49+
50+
Keyword arguments:
51+
connector -- connector information, used to get missing parameters and send status updates
52+
host -- the clowder host, including http and port, should end with a /
53+
key -- the secret key to login to clowder
54+
datasetid -- the dataset to delete
55+
"""
56+
headers = {"Authorization": "Bearer " + key}
57+
58+
url = "%sapi/v2/datasets/%s" % (host, datasetid)
59+
60+
result = requests.delete(url, verify=connector.ssl_verify if connector else True)
61+
result.raise_for_status()
62+
63+
return json.loads(result.text)
64+
65+
# TODO collection not implemented yet in v2
66+
def delete_by_collection(connector, host, key, collectionid, recursive=True, delete_colls=False):
67+
"""Delete datasets from Clowder by iterating through collection.
68+
69+
Keyword arguments:
70+
connector -- connector information, used to get missing parameters and send status updates
71+
host -- the clowder host, including http and port, should end with a /
72+
key -- the secret key to login to clowder
73+
collectionid -- the collection to walk
74+
recursive -- whether to also iterate across child collections
75+
delete_colls -- whether to also delete collections containing the datasets
76+
"""
77+
dslist = get_datasets(connector, host, key, collectionid)
78+
for ds in dslist:
79+
delete(connector, host, key, ds['id'])
80+
81+
if recursive:
82+
childcolls = get_child_collections(connector, host, key, collectionid)
83+
for coll in childcolls:
84+
delete_by_collection(connector, host, key, coll['id'], recursive, delete_colls)
85+
86+
if delete_colls:
87+
delete_collection(connector, host, key, collectionid)
88+
89+
def download(connector, host, key, datasetid, token=None):
90+
"""Download dataset to be processed from Clowder as zip file.
91+
92+
Keyword arguments:
93+
connector -- connector information, used to get missing parameters and send status updates
94+
host -- the clowder host, including http and port, should end with a /
95+
key -- the secret key to login to clowder
96+
datasetid -- the file that is currently being processed
97+
"""
98+
connector.message_process({"type": "dataset", "id": datasetid}, "Downloading dataset.")
99+
100+
# fetch dataset zipfile
101+
url = '%sapi/datasets/%s/download?key=%s' % (host, datasetid, key)
102+
result = requests.get(url, stream=True,
103+
verify=connector.ssl_verify if connector else True)
104+
result.raise_for_status()
105+
106+
(filedescriptor, zipfile) = tempfile.mkstemp(suffix=".zip")
107+
with os.fdopen(filedescriptor, "wb") as outfile:
108+
for chunk in result.iter_content(chunk_size=10 * 1024):
109+
outfile.write(chunk)
110+
111+
return zipfile
112+
113+
def download_metadata(connector, host, key, datasetid, extractor=None, token=None):
114+
"""Download dataset JSON-LD metadata from Clowder.
115+
116+
Keyword arguments:
117+
connector -- connector information, used to get missing parameters and send status updates
118+
host -- the clowder host, including http and port, should end with a /
119+
key -- the secret key to login to clowder
120+
datasetid -- the dataset to fetch metadata of
121+
extractor -- extractor name to filter results (if only one extractor's metadata is desired)
122+
"""
123+
headers = {"Authorization": "Bearer " + key}
124+
125+
filterstring = "" if extractor is None else "&extractor=%s" % extractor
126+
url = '%sapi/v2/datasets/%s/metadata' % (host, datasetid)
127+
128+
# fetch data
129+
result = requests.get(url, stream=True, headers=headers,
130+
verify=connector.ssl_verify if connector else True)
131+
result.raise_for_status()
132+
133+
return result.json()
134+
135+
def get_info(connector, host, key, datasetid, token=None):
136+
"""Get basic dataset information from UUID.
137+
138+
Keyword arguments:
139+
connector -- connector information, used to get missing parameters and send status updates
140+
host -- the clowder host, including http and port, should end with a /
141+
key -- the secret key to login to clowder
142+
datasetid -- the dataset to get info of
143+
"""
144+
headers = {"Authorization": "Bearer " + key}
145+
146+
url = "%sapi/v2/datasets/%s" % (host, datasetid)
147+
148+
result = requests.get(url, headers=headers,
149+
verify=connector.ssl_verify if connector else True)
150+
result.raise_for_status()
151+
152+
return json.loads(result.text)
153+
154+
def get_file_list(connector, host, key, datasetid, token=None):
155+
"""Get list of files in a dataset as JSON object.
156+
157+
Keyword arguments:
158+
connector -- connector information, used to get missing parameters and send status updates
159+
host -- the clowder host, including http and port, should end with a /
160+
key -- the secret key to login to clowder
161+
datasetid -- the dataset to get filelist of
162+
"""
163+
headers = {"Authorization": "Bearer " + key}
164+
165+
url = "%sapi/v2/datasets/%s/files" % (host, datasetid)
166+
167+
result = requests.get(url, headers=headers, verify=connector.ssl_verify if connector else True)
168+
result.raise_for_status()
169+
170+
return json.loads(result.text)
171+
172+
def remove_metadata(connector, host, key, datasetid, extractor=None):
173+
"""Delete dataset JSON-LD metadata from Clowder.
174+
175+
Keyword arguments:
176+
connector -- connector information, used to get missing parameters and send status updates
177+
host -- the clowder host, including http and port, should end with a /
178+
key -- the secret key to login to clowder
179+
datasetid -- the dataset to fetch metadata of
180+
extractor -- extractor name to filter deletion
181+
!!! ALL JSON-LD METADATA WILL BE REMOVED IF NO extractor PROVIDED !!!
182+
"""
183+
headers = {"Authorization": "Bearer " + key}
184+
185+
filterstring = "" if extractor is None else "&extractor=%s" % extractor
186+
url = '%sapi/v2/datasets/%s/metadata' % (host, datasetid)
187+
188+
# fetch data
189+
result = requests.delete(url, stream=True, headers=headers,
190+
verify=connector.ssl_verify if connector else True)
191+
result.raise_for_status()
192+
193+
def submit_extraction(connector, host, key, datasetid, extractorname, token=None):
194+
"""Submit dataset for extraction by given extractor.
195+
196+
Keyword arguments:
197+
connector -- connector information, used to get missing parameters and send status updates
198+
host -- the clowder host, including http and port, should end with a /
199+
key -- the secret key to login to clowder
200+
datasetid -- the dataset UUID to submit
201+
extractorname -- registered name of extractor to trigger
202+
"""
203+
headers = {'Content-Type': 'application/json',
204+
"Authorization": "Bearer " + key}
205+
206+
url = "%sapi/v2/datasets/%s/extractions?key=%s" % (host, datasetid)
207+
208+
result = requests.post(url,
209+
headers=headers,
210+
data=json.dumps({"extractor": extractorname}),
211+
verify=connector.ssl_verify if connector else True)
212+
result.raise_for_status()
213+
214+
return result.status_code
215+
216+
def submit_extractions_by_collection(connector, host, key, collectionid, extractorname, recursive=True):
217+
"""Manually trigger an extraction on all datasets in a collection.
218+
219+
This will iterate through all datasets in the given collection and submit them to
220+
the provided extractor.
221+
222+
Keyword arguments:
223+
connector -- connector information, used to get missing parameters and send status updates
224+
host -- the clowder host, including http and port, should end with a /
225+
key -- the secret key to login to clowder
226+
datasetid -- the dataset UUID to submit
227+
extractorname -- registered name of extractor to trigger
228+
recursive -- whether to also submit child collection datasets recursively (defaults to True)
229+
"""
230+
dslist = get_datasets(connector, host, key, collectionid)
231+
232+
for ds in dslist:
233+
submit_extraction(connector, host, key, ds['id'], extractorname)
234+
235+
if recursive:
236+
childcolls = get_child_collections(connector, host, key, collectionid)
237+
for coll in childcolls:
238+
submit_extractions_by_collection(connector, host, key, coll['id'], extractorname, recursive)
239+
240+
# TODO tags not implemented in v2
241+
def upload_tags(connector, host, key, datasetid, tags):
242+
"""Upload dataset tag to Clowder.
243+
244+
Keyword arguments:
245+
connector -- connector information, used to get missing parameters and send status updates
246+
host -- the clowder host, including http and port, should end with a /
247+
key -- the secret key to login to clowder
248+
datasetid -- the dataset that is currently being processed
249+
tags -- the tags to be uploaded
250+
"""
251+
connector.status_update(StatusMessage.processing, {"type": "dataset", "id": datasetid}, "Uploading dataset tags.")
252+
253+
headers = {'Content-Type': 'application/json'}
254+
url = '%sapi/datasets/%s/tags?key=%s' % (host, datasetid, key)
255+
result = connector.post(url, headers=headers, data=json.dumps(tags),
256+
verify=connector.ssl_verify if connector else True)
257+
258+
259+
def upload_metadata(connector, host, key, datasetid, metadata, token=None):
260+
"""Upload dataset JSON-LD metadata to Clowder.
261+
262+
Keyword arguments:
263+
connector -- connector information, used to get missing parameters and send status updates
264+
host -- the clowder host, including http and port, should end with a /
265+
key -- the secret key to login to clowder
266+
datasetid -- the dataset that is currently being processed
267+
metadata -- the metadata to be uploaded
268+
"""
269+
headers = {'Content-Type': 'application/json',
270+
"Authorization": "Bearer " + key}
271+
connector.message_process({"type": "dataset", "id": datasetid}, "Uploading dataset metadata.")
272+
273+
url = '%sapi/v2/datasets/%s/metadata' % (host, datasetid)
274+
result = requests.post(url, headers=headers, data=json.dumps(metadata),
275+
verify=connector.ssl_verify if connector else True)
276+
result.raise_for_status()
277+
278+
279+
# TODO not done yet, need more testing
280+
class DatasetsApi(object):
281+
"""
282+
API to manage the REST CRUD endpoints for datasets.
283+
"""
284+
285+
def __init__(self, client=None, host=None, key=None,
286+
username=None, password=None):
287+
"""Set client if provided otherwise create new one"""
288+
if client:
289+
self.client = client
290+
else:
291+
self.client = ClowderClient(host=host, key=key,
292+
username=username, password=password)
293+
294+
def datasets_get(self):
295+
"""
296+
Get the list of all available datasets.
297+
298+
:return: Full list of datasets.
299+
:rtype: `requests.Response`
300+
"""
301+
logging.debug("Getting all datasets")
302+
try:
303+
return self.client.get("/datasets")
304+
except Exception as e:
305+
logging.error("Error retrieving dataset list: %s", str(e))
306+
307+
def dataset_get(self, dataset_id):
308+
"""
309+
Get a specific dataset by id.
310+
311+
:return: Sensor object as JSON.
312+
:rtype: `requests.Response`
313+
"""
314+
logging.debug("Getting dataset %s" % dataset_id)
315+
try:
316+
return self.client.get("/datasets/%s" % dataset_id)
317+
except Exception as e:
318+
logging.error("Error retrieving dataset %s: %s" % (dataset_id, str(e)))
319+
320+
def create_empty(self, dataset_id):
321+
"""
322+
Create dataset.
323+
324+
:return: If successful or not.
325+
:rtype: `requests.Response`
326+
"""
327+
logging.debug("Adding dataset")
328+
try:
329+
return self.client.post("/datasets/createempty", dataset_id)
330+
except Exception as e:
331+
logging.error("Error adding datapoint %s: %s" % (dataset_id, str(e)))
332+
333+
def dataset_delete(self, dataset_id):
334+
"""
335+
Delete a specific dataset by id.
336+
337+
:return: If successfull or not.
338+
:rtype: `requests.Response`
339+
"""
340+
logging.debug("Deleting dataset %s" % dataset_id)
341+
try:
342+
return self.client.delete("/datasets/%s" % dataset_id)
343+
except Exception as e:
344+
logging.error("Error retrieving dataset %s: %s" % (dataset_id, str(e)))
345+
346+
def upload_file(self, dataset_id, file):
347+
"""
348+
Add a file to a dataset.
349+
350+
:return: If successfull or not.
351+
:rtype: `requests.Response`
352+
"""
353+
logging.debug("Uploading a file to dataset %s" % dataset_id)
354+
try:
355+
return self.client.post_file("/uploadToDataset/%s" % dataset_id, file)
356+
except Exception as e:
357+
logging.error("Error upload to dataset %s: %s" % (dataset_id, str(e)))
358+
359+
def add_metadata(self, dataset_id, metadata):
360+
"""
361+
Add a file to a dataset
362+
363+
:return: If successfull or not.
364+
:rtype: `requests.Response`
365+
"""
366+
367+
logging.debug("Update metadata of dataset %s" % dataset_id)
368+
try:
369+
return self.client.post("/datasets/%s/metadata" % dataset_id, metadata)
370+
except Exception as e:
371+
logging.error("Error upload to dataset %s: %s" % (dataset_id, str(e)))

pyclowder/api/v1/files.py

Whitespace-only changes.

pyclowder/api/v1/metadata.py

Whitespace-only changes.

0 commit comments

Comments
 (0)