1+
2+ def create_empty (connector , host , key , datasetname , description , parentid = None , spaceid = None , token = None ):
3+ """Create a new dataset in Clowder.
4+
5+ Keyword arguments:
6+ connector -- connector information, used to get missing parameters and send status updates
7+ host -- the clowder host, including http and port, should end with a /
8+ key -- the secret key to login to clowder
9+ datasetname -- name of new dataset to create
10+ description -- description of new dataset
11+ parentid -- id of parent collection
12+ spaceid -- id of the space to add dataset to
13+ """
14+ logger = logging .getLogger (__name__ )
15+
16+ url = '%sapi/datasets/createempty?key=%s' % (host , key )
17+
18+ if parentid :
19+ if spaceid :
20+ result = requests .post (url , headers = {"Content-Type" : "application/json" },
21+ data = json .dumps ({"name" : datasetname , "description" : description ,
22+ "collection" : [parentid ], "space" : [spaceid ]}),
23+ verify = connector .ssl_verify if connector else True )
24+ else :
25+ result = requests .post (url , headers = {"Content-Type" : "application/json" },
26+ data = json .dumps ({"name" : datasetname , "description" : description ,
27+ "collection" : [parentid ]}),
28+ verify = connector .ssl_verify if connector else True )
29+ else :
30+ if spaceid :
31+ result = requests .post (url , headers = {"Content-Type" : "application/json" },
32+ data = json .dumps ({"name" : datasetname , "description" : description ,
33+ "space" : [spaceid ]}),
34+ verify = connector .ssl_verify if connector else True )
35+ else :
36+ result = requests .post (url , headers = {"Content-Type" : "application/json" },
37+ data = json .dumps ({"name" : datasetname , "description" : description }),
38+ verify = connector .ssl_verify if connector else True )
39+
40+ result .raise_for_status ()
41+
42+ datasetid = result .json ()['id' ]
43+ logger .debug ("dataset id = [%s]" , datasetid )
44+
45+ return datasetid
46+
47+ def delete (connector , host , key , datasetid , token = None ):
48+ """Delete dataset from Clowder.
49+
50+ Keyword arguments:
51+ connector -- connector information, used to get missing parameters and send status updates
52+ host -- the clowder host, including http and port, should end with a /
53+ key -- the secret key to login to clowder
54+ datasetid -- the dataset to delete
55+ """
56+ headers = {"Authorization" : "Bearer " + key }
57+
58+ url = "%sapi/v2/datasets/%s" % (host , datasetid )
59+
60+ result = requests .delete (url , verify = connector .ssl_verify if connector else True )
61+ result .raise_for_status ()
62+
63+ return json .loads (result .text )
64+
65+ # TODO collection not implemented yet in v2
66+ def delete_by_collection (connector , host , key , collectionid , recursive = True , delete_colls = False ):
67+ """Delete datasets from Clowder by iterating through collection.
68+
69+ Keyword arguments:
70+ connector -- connector information, used to get missing parameters and send status updates
71+ host -- the clowder host, including http and port, should end with a /
72+ key -- the secret key to login to clowder
73+ collectionid -- the collection to walk
74+ recursive -- whether to also iterate across child collections
75+ delete_colls -- whether to also delete collections containing the datasets
76+ """
77+ dslist = get_datasets (connector , host , key , collectionid )
78+ for ds in dslist :
79+ delete (connector , host , key , ds ['id' ])
80+
81+ if recursive :
82+ childcolls = get_child_collections (connector , host , key , collectionid )
83+ for coll in childcolls :
84+ delete_by_collection (connector , host , key , coll ['id' ], recursive , delete_colls )
85+
86+ if delete_colls :
87+ delete_collection (connector , host , key , collectionid )
88+
89+ def download (connector , host , key , datasetid , token = None ):
90+ """Download dataset to be processed from Clowder as zip file.
91+
92+ Keyword arguments:
93+ connector -- connector information, used to get missing parameters and send status updates
94+ host -- the clowder host, including http and port, should end with a /
95+ key -- the secret key to login to clowder
96+ datasetid -- the file that is currently being processed
97+ """
98+ connector .message_process ({"type" : "dataset" , "id" : datasetid }, "Downloading dataset." )
99+
100+ # fetch dataset zipfile
101+ url = '%sapi/datasets/%s/download?key=%s' % (host , datasetid , key )
102+ result = requests .get (url , stream = True ,
103+ verify = connector .ssl_verify if connector else True )
104+ result .raise_for_status ()
105+
106+ (filedescriptor , zipfile ) = tempfile .mkstemp (suffix = ".zip" )
107+ with os .fdopen (filedescriptor , "wb" ) as outfile :
108+ for chunk in result .iter_content (chunk_size = 10 * 1024 ):
109+ outfile .write (chunk )
110+
111+ return zipfile
112+
113+ def download_metadata (connector , host , key , datasetid , extractor = None , token = None ):
114+ """Download dataset JSON-LD metadata from Clowder.
115+
116+ Keyword arguments:
117+ connector -- connector information, used to get missing parameters and send status updates
118+ host -- the clowder host, including http and port, should end with a /
119+ key -- the secret key to login to clowder
120+ datasetid -- the dataset to fetch metadata of
121+ extractor -- extractor name to filter results (if only one extractor's metadata is desired)
122+ """
123+ headers = {"Authorization" : "Bearer " + key }
124+
125+ filterstring = "" if extractor is None else "&extractor=%s" % extractor
126+ url = '%sapi/v2/datasets/%s/metadata' % (host , datasetid )
127+
128+ # fetch data
129+ result = requests .get (url , stream = True , headers = headers ,
130+ verify = connector .ssl_verify if connector else True )
131+ result .raise_for_status ()
132+
133+ return result .json ()
134+
135+ def get_info (connector , host , key , datasetid , token = None ):
136+ """Get basic dataset information from UUID.
137+
138+ Keyword arguments:
139+ connector -- connector information, used to get missing parameters and send status updates
140+ host -- the clowder host, including http and port, should end with a /
141+ key -- the secret key to login to clowder
142+ datasetid -- the dataset to get info of
143+ """
144+ headers = {"Authorization" : "Bearer " + key }
145+
146+ url = "%sapi/v2/datasets/%s" % (host , datasetid )
147+
148+ result = requests .get (url , headers = headers ,
149+ verify = connector .ssl_verify if connector else True )
150+ result .raise_for_status ()
151+
152+ return json .loads (result .text )
153+
154+ def get_file_list (connector , host , key , datasetid , token = None ):
155+ """Get list of files in a dataset as JSON object.
156+
157+ Keyword arguments:
158+ connector -- connector information, used to get missing parameters and send status updates
159+ host -- the clowder host, including http and port, should end with a /
160+ key -- the secret key to login to clowder
161+ datasetid -- the dataset to get filelist of
162+ """
163+ headers = {"Authorization" : "Bearer " + key }
164+
165+ url = "%sapi/v2/datasets/%s/files" % (host , datasetid )
166+
167+ result = requests .get (url , headers = headers , verify = connector .ssl_verify if connector else True )
168+ result .raise_for_status ()
169+
170+ return json .loads (result .text )
171+
172+ def remove_metadata (connector , host , key , datasetid , extractor = None ):
173+ """Delete dataset JSON-LD metadata from Clowder.
174+
175+ Keyword arguments:
176+ connector -- connector information, used to get missing parameters and send status updates
177+ host -- the clowder host, including http and port, should end with a /
178+ key -- the secret key to login to clowder
179+ datasetid -- the dataset to fetch metadata of
180+ extractor -- extractor name to filter deletion
181+ !!! ALL JSON-LD METADATA WILL BE REMOVED IF NO extractor PROVIDED !!!
182+ """
183+ headers = {"Authorization" : "Bearer " + key }
184+
185+ filterstring = "" if extractor is None else "&extractor=%s" % extractor
186+ url = '%sapi/v2/datasets/%s/metadata' % (host , datasetid )
187+
188+ # fetch data
189+ result = requests .delete (url , stream = True , headers = headers ,
190+ verify = connector .ssl_verify if connector else True )
191+ result .raise_for_status ()
192+
193+ def submit_extraction (connector , host , key , datasetid , extractorname , token = None ):
194+ """Submit dataset for extraction by given extractor.
195+
196+ Keyword arguments:
197+ connector -- connector information, used to get missing parameters and send status updates
198+ host -- the clowder host, including http and port, should end with a /
199+ key -- the secret key to login to clowder
200+ datasetid -- the dataset UUID to submit
201+ extractorname -- registered name of extractor to trigger
202+ """
203+ headers = {'Content-Type' : 'application/json' ,
204+ "Authorization" : "Bearer " + key }
205+
206+ url = "%sapi/v2/datasets/%s/extractions?key=%s" % (host , datasetid )
207+
208+ result = requests .post (url ,
209+ headers = headers ,
210+ data = json .dumps ({"extractor" : extractorname }),
211+ verify = connector .ssl_verify if connector else True )
212+ result .raise_for_status ()
213+
214+ return result .status_code
215+
216+ def submit_extractions_by_collection (connector , host , key , collectionid , extractorname , recursive = True ):
217+ """Manually trigger an extraction on all datasets in a collection.
218+
219+ This will iterate through all datasets in the given collection and submit them to
220+ the provided extractor.
221+
222+ Keyword arguments:
223+ connector -- connector information, used to get missing parameters and send status updates
224+ host -- the clowder host, including http and port, should end with a /
225+ key -- the secret key to login to clowder
226+ datasetid -- the dataset UUID to submit
227+ extractorname -- registered name of extractor to trigger
228+ recursive -- whether to also submit child collection datasets recursively (defaults to True)
229+ """
230+ dslist = get_datasets (connector , host , key , collectionid )
231+
232+ for ds in dslist :
233+ submit_extraction (connector , host , key , ds ['id' ], extractorname )
234+
235+ if recursive :
236+ childcolls = get_child_collections (connector , host , key , collectionid )
237+ for coll in childcolls :
238+ submit_extractions_by_collection (connector , host , key , coll ['id' ], extractorname , recursive )
239+
240+ # TODO tags not implemented in v2
241+ def upload_tags (connector , host , key , datasetid , tags ):
242+ """Upload dataset tag to Clowder.
243+
244+ Keyword arguments:
245+ connector -- connector information, used to get missing parameters and send status updates
246+ host -- the clowder host, including http and port, should end with a /
247+ key -- the secret key to login to clowder
248+ datasetid -- the dataset that is currently being processed
249+ tags -- the tags to be uploaded
250+ """
251+ connector .status_update (StatusMessage .processing , {"type" : "dataset" , "id" : datasetid }, "Uploading dataset tags." )
252+
253+ headers = {'Content-Type' : 'application/json' }
254+ url = '%sapi/datasets/%s/tags?key=%s' % (host , datasetid , key )
255+ result = connector .post (url , headers = headers , data = json .dumps (tags ),
256+ verify = connector .ssl_verify if connector else True )
257+
258+
259+ def upload_metadata (connector , host , key , datasetid , metadata , token = None ):
260+ """Upload dataset JSON-LD metadata to Clowder.
261+
262+ Keyword arguments:
263+ connector -- connector information, used to get missing parameters and send status updates
264+ host -- the clowder host, including http and port, should end with a /
265+ key -- the secret key to login to clowder
266+ datasetid -- the dataset that is currently being processed
267+ metadata -- the metadata to be uploaded
268+ """
269+ headers = {'Content-Type' : 'application/json' ,
270+ "Authorization" : "Bearer " + key }
271+ connector .message_process ({"type" : "dataset" , "id" : datasetid }, "Uploading dataset metadata." )
272+
273+ url = '%sapi/v2/datasets/%s/metadata' % (host , datasetid )
274+ result = requests .post (url , headers = headers , data = json .dumps (metadata ),
275+ verify = connector .ssl_verify if connector else True )
276+ result .raise_for_status ()
277+
278+
279+ # TODO not done yet, need more testing
280+ class DatasetsApi (object ):
281+ """
282+ API to manage the REST CRUD endpoints for datasets.
283+ """
284+
285+ def __init__ (self , client = None , host = None , key = None ,
286+ username = None , password = None ):
287+ """Set client if provided otherwise create new one"""
288+ if client :
289+ self .client = client
290+ else :
291+ self .client = ClowderClient (host = host , key = key ,
292+ username = username , password = password )
293+
294+ def datasets_get (self ):
295+ """
296+ Get the list of all available datasets.
297+
298+ :return: Full list of datasets.
299+ :rtype: `requests.Response`
300+ """
301+ logging .debug ("Getting all datasets" )
302+ try :
303+ return self .client .get ("/datasets" )
304+ except Exception as e :
305+ logging .error ("Error retrieving dataset list: %s" , str (e ))
306+
307+ def dataset_get (self , dataset_id ):
308+ """
309+ Get a specific dataset by id.
310+
311+ :return: Sensor object as JSON.
312+ :rtype: `requests.Response`
313+ """
314+ logging .debug ("Getting dataset %s" % dataset_id )
315+ try :
316+ return self .client .get ("/datasets/%s" % dataset_id )
317+ except Exception as e :
318+ logging .error ("Error retrieving dataset %s: %s" % (dataset_id , str (e )))
319+
320+ def create_empty (self , dataset_id ):
321+ """
322+ Create dataset.
323+
324+ :return: If successful or not.
325+ :rtype: `requests.Response`
326+ """
327+ logging .debug ("Adding dataset" )
328+ try :
329+ return self .client .post ("/datasets/createempty" , dataset_id )
330+ except Exception as e :
331+ logging .error ("Error adding datapoint %s: %s" % (dataset_id , str (e )))
332+
333+ def dataset_delete (self , dataset_id ):
334+ """
335+ Delete a specific dataset by id.
336+
337+ :return: If successfull or not.
338+ :rtype: `requests.Response`
339+ """
340+ logging .debug ("Deleting dataset %s" % dataset_id )
341+ try :
342+ return self .client .delete ("/datasets/%s" % dataset_id )
343+ except Exception as e :
344+ logging .error ("Error retrieving dataset %s: %s" % (dataset_id , str (e )))
345+
346+ def upload_file (self , dataset_id , file ):
347+ """
348+ Add a file to a dataset.
349+
350+ :return: If successfull or not.
351+ :rtype: `requests.Response`
352+ """
353+ logging .debug ("Uploading a file to dataset %s" % dataset_id )
354+ try :
355+ return self .client .post_file ("/uploadToDataset/%s" % dataset_id , file )
356+ except Exception as e :
357+ logging .error ("Error upload to dataset %s: %s" % (dataset_id , str (e )))
358+
359+ def add_metadata (self , dataset_id , metadata ):
360+ """
361+ Add a file to a dataset
362+
363+ :return: If successfull or not.
364+ :rtype: `requests.Response`
365+ """
366+
367+ logging .debug ("Update metadata of dataset %s" % dataset_id )
368+ try :
369+ return self .client .post ("/datasets/%s/metadata" % dataset_id , metadata )
370+ except Exception as e :
371+ logging .error ("Error upload to dataset %s: %s" % (dataset_id , str (e )))
0 commit comments