Skip to content

Commit f98a5dc

Browse files
committed
adding new class for api files, metadata
more to be added later for datasets once that completed in clowderv2
1 parent ff13256 commit f98a5dc

File tree

2 files changed

+394
-0
lines changed

2 files changed

+394
-0
lines changed

pyclowder/api/v2/files.py

Lines changed: 394 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,394 @@
1+
"""Clowder API
2+
3+
This module provides simple wrappers around the clowder Files API
4+
"""
5+
6+
import json
7+
import logging
8+
import os
9+
import tempfile
10+
11+
import requests
12+
from requests_toolbelt.multipart.encoder import MultipartEncoder
13+
from urllib3.filepost import encode_multipart_formdata
14+
15+
from pyclowder.datasets import get_file_list
16+
from pyclowder.collections import get_datasets, get_child_collections
17+
18+
from dotenv import load_dotenv
19+
load_dotenv()
20+
clowder_version = float(os.getenv('clowder_version'))
21+
22+
# Some sources of urllib3 support warning suppression, but not all
23+
try:
24+
from urllib3 import disable_warnings
25+
from requests.packages.urllib3.exceptions import InsecureRequestWarning
26+
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
27+
except:
28+
pass
29+
30+
31+
# pylint: disable=too-many-arguments
32+
def download(connector, host, key, fileid, intermediatefileid=None, ext="", token=None):
33+
"""Download file to be processed from Clowder.
34+
35+
Keyword arguments:
36+
connector -- connector information, used to get missing parameters and send status updates
37+
host -- the clowder host, including http and port, should end with a /
38+
key -- the secret key to login to clowder
39+
fileid -- the file that is currently being processed
40+
intermediatefileid -- either same as fileid, or the intermediate file to be used
41+
ext -- the file extension, the downloaded file will end with this extension
42+
"""
43+
44+
connector.message_process({"type": "file", "id": fileid}, "Downloading file.")
45+
46+
47+
48+
# TODO: intermediateid doesn't really seem to be used here, can we remove entirely?
49+
if not intermediatefileid:
50+
intermediatefileid = fileid
51+
52+
url = '%sapi/v2/files/%s' % (host, intermediatefileid)
53+
headers = {"Authorization": "Bearer " + token}
54+
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True, headers=headers)
55+
56+
(inputfile, inputfilename) = tempfile.mkstemp(suffix=ext)
57+
58+
try:
59+
with os.fdopen(inputfile, "wb") as outputfile:
60+
for chunk in result.iter_content(chunk_size=10 * 1024):
61+
outputfile.write(chunk)
62+
return inputfilename
63+
except Exception:
64+
os.remove(inputfilename)
65+
raise
66+
67+
68+
def download_info(connector, host, key, fileid, token=None):
69+
"""Download file summary metadata from Clowder.
70+
71+
Keyword arguments:
72+
connector -- connector information, used to get missing parameters and send status updates
73+
host -- the clowder host, including http and port, should end with a /
74+
key -- the secret key to login to clowder
75+
fileid -- the file to fetch metadata of
76+
"""
77+
78+
url = '%sapi/v2/files/%s/metadata' % (host, fileid)
79+
headers = {"Authorization": "Bearer " + token}
80+
# fetch data
81+
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True, headers=headers)
82+
83+
return result.json()
84+
85+
86+
def download_metadata(connector, host, key, fileid, extractor=None, token=None):
87+
"""Download file JSON-LD metadata from Clowder.
88+
89+
Keyword arguments:
90+
connector -- connector information, used to get missing parameters and send status updates
91+
host -- the clowder host, including http and port, should end with a /
92+
key -- the secret key to login to clowder
93+
fileid -- the file to fetch metadata of
94+
extractor -- extractor name to filter results (if only one extractor's metadata is desired)
95+
"""
96+
97+
filterstring = "" if extractor is None else "?extractor=%s" % extractor
98+
url = '%sapi/v2/files/%s/metadata?%s' % (host, fileid, filterstring)
99+
headers = {"Authorization": "Bearer " + token}
100+
101+
# fetch data
102+
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True, headers=headers)
103+
104+
return result.json()
105+
106+
107+
def submit_extraction(connector, host, key, fileid, extractorname, token=None):
108+
"""Submit file for extraction by given extractor.
109+
110+
Keyword arguments:
111+
connector -- connector information, used to get missing parameters and send status updates
112+
host -- the clowder host, including http and port, should end with a /
113+
key -- the secret key to login to clowder
114+
fileid -- the file UUID to submit
115+
extractorname -- registered name of extractor to trigger
116+
"""
117+
118+
url = "%sapi/v2/files/%s/extractions?key=%s" % (host, fileid, key)
119+
result = connector.post(url,
120+
headers={'Content-Type': 'application/json',
121+
"Authorization": "Bearer " + token},
122+
data=json.dumps({"extractor": extractorname}),
123+
verify=connector.ssl_verify if connector else True)
124+
125+
return result.json()
126+
127+
128+
# TODO not implemented in v2
129+
def submit_extractions_by_dataset(connector, host, key, datasetid, extractorname, ext=False):
130+
"""Manually trigger an extraction on all files in a dataset.
131+
132+
This will iterate through all files in the given dataset and submit them to
133+
the provided extractor.
134+
135+
Keyword arguments:
136+
connector -- connector information, used to get missing parameters and send status updates
137+
host -- the clowder host, including http and port, should end with a /
138+
key -- the secret key to login to clowder
139+
datasetid -- the dataset UUID to submit
140+
extractorname -- registered name of extractor to trigger
141+
ext -- extension to filter. e.g. 'tif' will only submit TIFF files for extraction.
142+
"""
143+
144+
filelist = get_file_list(connector, host, key, datasetid)
145+
146+
for f in filelist:
147+
# Only submit files that end with given extension, if specified
148+
if ext and not f['filename'].endswith(ext):
149+
continue
150+
151+
submit_extraction(connector, host, key, f['id'], extractorname)
152+
153+
154+
# TODO not implemented in v2
155+
def submit_extractions_by_collection(connector, host, key, collectionid, extractorname, ext=False, recursive=True):
156+
"""Manually trigger an extraction on all files in a collection.
157+
158+
This will iterate through all datasets in the given collection and submit them to
159+
the submit_extractions_by_dataset(). Does not operate recursively if there are nested collections.
160+
161+
Keyword arguments:
162+
connector -- connector information, used to get missing parameters and send status updates
163+
host -- the clowder host, including http and port, should end with a /
164+
key -- the secret key to login to clowder
165+
collectionid -- the collection UUID to submit
166+
extractorname -- registered name of extractor to trigger
167+
ext -- extension to filter. e.g. 'tif' will only submit TIFF files for extraction
168+
recursive -- whether to also submit child collection files recursively (defaults to True)
169+
"""
170+
171+
dslist = get_datasets(connector, host, key, collectionid)
172+
173+
for ds in dslist:
174+
submit_extractions_by_dataset(connector, host, key, ds['id'], extractorname, ext)
175+
176+
if recursive:
177+
childcolls = get_child_collections(connector, host, key, collectionid)
178+
for coll in childcolls:
179+
submit_extractions_by_collection(connector, host, key, coll['id'], extractorname, ext, recursive)
180+
181+
182+
def upload_metadata(connector, host, key, fileid, metadata, token=None):
183+
"""Upload file JSON-LD metadata to Clowder.
184+
185+
Keyword arguments:
186+
connector -- connector information, used to get missing parameters and send status updates
187+
host -- the clowder host, including http and port, should end with a /
188+
key -- the secret key to login to clowder
189+
fileid -- the file that is currently being processed
190+
metadata -- the metadata to be uploaded
191+
"""
192+
193+
connector.message_process({"type": "file", "id": fileid}, "Uploading file metadata.")
194+
if token:
195+
headers = {'Content-Type': 'application/json',
196+
'Authorization':'Bearer ' + token}
197+
else:
198+
headers = {'Content-Type': 'application/json',
199+
'Authorization':'Bearer ' + key}
200+
print(metadata)
201+
url = '%sapi/v2/files/%s/metadata' % (host, fileid)
202+
result = connector.post(url, headers=headers, data=json.dumps(metadata),
203+
verify=connector.ssl_verify if connector else True)
204+
205+
206+
# TODO not implemented in v2
207+
# pylint: disable=too-many-arguments
208+
def upload_preview(connector, host, key, fileid, previewfile, previewmetadata=None, preview_mimetype=None):
209+
"""Upload preview to Clowder.
210+
211+
Keyword arguments:
212+
connector -- connector information, used to get missing parameters and send status updates
213+
host -- the clowder host, including http and port, should end with a /
214+
key -- the secret key to login to clowder
215+
fileid -- the file that is currently being processed
216+
previewfile -- the file containing the preview
217+
previewmetadata -- any metadata to be associated with preview, can contain a section_id
218+
to indicate the section this preview should be associated with.
219+
preview_mimetype -- (optional) MIME type of the preview file. By default, this is obtained from the
220+
file itself and this parameter can be ignored. E.g. 'application/vnd.clowder+custom+xml'
221+
"""
222+
223+
connector.message_process({"type": "file", "id": fileid}, "Uploading file preview.")
224+
225+
logger = logging.getLogger(__name__)
226+
headers = {'Content-Type': 'application/json'}
227+
228+
# upload preview
229+
url = '%sapi/previews?key=%s' % (host, key)
230+
with open(previewfile, 'rb') as filebytes:
231+
# If a custom preview file MIME type is provided, use it to generate the preview file object.
232+
if preview_mimetype is not None:
233+
result = connector.post(url, files={"File": (os.path.basename(previewfile), filebytes, preview_mimetype)},
234+
verify=connector.ssl_verify if connector else True)
235+
else:
236+
result = connector.post(url, files={"File": filebytes}, verify=connector.ssl_verify if connector else True)
237+
238+
previewid = result.json()['id']
239+
logger.debug("preview id = [%s]", previewid)
240+
241+
# associate uploaded preview with orginal file
242+
if fileid and not (previewmetadata and 'section_id' in previewmetadata and previewmetadata['section_id']):
243+
url = '%sapi/files/%s/previews/%s?key=%s' % (host, fileid, previewid, key)
244+
result = connector.post(url, headers=headers, data=json.dumps({}),
245+
verify=connector.ssl_verify if connector else True)
246+
247+
# associate metadata with preview
248+
if previewmetadata is not None:
249+
url = '%sapi/previews/%s/metadata?key=%s' % (host, previewid, key)
250+
result = connector.post(url, headers=headers, data=json.dumps(previewmetadata),
251+
verify=connector.ssl_verify if connector else True)
252+
253+
return previewid
254+
255+
# TODO not implemented in v2
256+
def upload_tags(connector, host, key, fileid, tags):
257+
"""Upload file tag to Clowder.
258+
259+
Keyword arguments:
260+
connector -- connector information, used to get missing parameters and send status updates
261+
host -- the clowder host, including http and port, should end with a /
262+
key -- the secret key to login to clowder
263+
fileid -- the file that is currently being processed
264+
tags -- the tags to be uploaded
265+
"""
266+
267+
connector.message_process({"type": "file", "id": fileid}, "Uploading file tags.")
268+
269+
headers = {'Content-Type': 'application/json'}
270+
url = '%sapi/files/%s/tags?key=%s' % (host, fileid, key)
271+
result = connector.post(url, headers=headers, data=json.dumps(tags),
272+
verify=connector.ssl_verify if connector else True)
273+
274+
# TODO not implemented in v2
275+
def upload_thumbnail(connector, host, key, fileid, thumbnail):
276+
"""Upload thumbnail to Clowder.
277+
278+
Keyword arguments:
279+
connector -- connector information, used to get missing parameters and send status updates
280+
host -- the clowder host, including http and port, should end with a /
281+
key -- the secret key to login to clowder
282+
fileid -- the file that the thumbnail should be associated with
283+
thumbnail -- the file containing the thumbnail
284+
"""
285+
286+
logger = logging.getLogger(__name__)
287+
url = host + 'api/fileThumbnail?key=' + key
288+
289+
# upload preview
290+
with open(thumbnail, 'rb') as inputfile:
291+
result = connector.post(url, files={"File": inputfile}, verify=connector.ssl_verify if connector else True)
292+
thumbnailid = result.json()['id']
293+
logger.debug("thumbnail id = [%s]", thumbnailid)
294+
295+
# associate uploaded preview with orginal file/dataset
296+
if fileid:
297+
headers = {'Content-Type': 'application/json'}
298+
url = host + 'api/files/' + fileid + '/thumbnails/' + thumbnailid + '?key=' + key
299+
connector.post(url, headers=headers, data=json.dumps({}), verify=connector.ssl_verify if connector else True)
300+
301+
return thumbnailid
302+
303+
304+
def upload_to_dataset(connector, host, key, datasetid, filepath, check_duplicate=False, token=None):
305+
"""Upload file to existing Clowder dataset.
306+
307+
Keyword arguments:
308+
connector -- connector information, used to get missing parameters and send status updates
309+
host -- the clowder host, including http and port, should end with a /
310+
key -- the secret key to login to clowder
311+
datasetid -- the dataset that the file should be associated with
312+
filepath -- path to file
313+
check_duplicate -- check if filename already exists in dataset and skip upload if so
314+
"""
315+
316+
logger = logging.getLogger(__name__)
317+
318+
# TODO fix this to use v2 api
319+
if check_duplicate:
320+
ds_files = get_file_list(connector, host, key, datasetid)
321+
for f in ds_files:
322+
if f['filename'] == os.path.basename(filepath):
323+
logger.debug("found %s in dataset %s; not re-uploading" % (f['filename'], datasetid))
324+
return None
325+
326+
for source_path in connector.mounted_paths:
327+
if filepath.startswith(connector.mounted_paths[source_path]):
328+
return _upload_to_dataset_local(connector, host, key, datasetid, filepath, token)
329+
330+
url = '%sapi/v2/datasets/%s/files' % (host, datasetid)
331+
332+
if os.path.exists(filepath):
333+
filename = os.path.basename(filepath)
334+
m = MultipartEncoder(
335+
fields={'file': (filename, open(filepath, 'rb'))}
336+
)
337+
if token:
338+
headers = {"Authorization": "Bearer " + token,
339+
'Content-Type': m.content_type}
340+
else:
341+
headers = {"Authorization": "Bearer " + key,
342+
'Content-Type': m.content_type}
343+
result = connector.post(url, data=m, headers=headers,
344+
verify=connector.ssl_verify if connector else True)
345+
346+
uploadedfileid = result.json()['id']
347+
logger.debug("uploaded file id = [%s]", uploadedfileid)
348+
349+
return uploadedfileid
350+
else:
351+
logger.error("unable to upload file %s (not found)", filepath)
352+
353+
354+
def _upload_to_dataset_local(connector, host, key, datasetid, filepath, token=None):
355+
"""Upload file POINTER to existing Clowder dataset. Does not copy actual file bytes.
356+
357+
Keyword arguments:
358+
connector -- connector information, used to get missing parameters and send status updates
359+
host -- the clowder host, including http and port, should end with a /
360+
key -- the secret key to login to clowder
361+
datasetid -- the dataset that the file should be associated with
362+
filepath -- path to file
363+
"""
364+
365+
logger = logging.getLogger(__name__)
366+
url = '%sapi/v2/datatsets/%s/files' % (host, datasetid)
367+
368+
if os.path.exists(filepath):
369+
# Replace local path with remote path before uploading
370+
for source_path in connector.mounted_paths:
371+
if filepath.startswith(connector.mounted_paths[source_path]):
372+
filepath = filepath.replace(connector.mounted_paths[source_path],
373+
source_path)
374+
break
375+
376+
filename = os.path.basename(filepath)
377+
m = MultipartEncoder(
378+
fields={'file': (filename, open(filepath, 'rb'))}
379+
)
380+
if token:
381+
headers = {"Authorization": "Bearer " + token,
382+
'Content-Type': m.content_type}
383+
else:
384+
headers = {"Authorization": "Bearer " + key,
385+
'Content-Type': m.content_type}
386+
result = connector.post(url, data=m, headers=headers,
387+
verify=connector.ssl_verify if connector else True)
388+
389+
uploadedfileid = result.json()['id']
390+
logger.debug("uploaded file id = [%s]", uploadedfileid)
391+
392+
return uploadedfileid
393+
else:
394+
logger.error("unable to upload local file %s (not found)", filepath)

pyclowder/api/v2/metadata.py

Whitespace-only changes.

0 commit comments

Comments
 (0)