Skip to content

Commit cfb5f4a

Browse files
committed
file methods in api.v2 and api.v1
1 parent 50e4a16 commit cfb5f4a

File tree

3 files changed

+384
-83
lines changed

3 files changed

+384
-83
lines changed

pyclowder/api/v1/files.py

Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
"""Clowder API
2+
3+
This module provides simple wrappers around the clowder Files API
4+
"""
5+
6+
import json
7+
import logging
8+
import os
9+
import tempfile
10+
11+
import requests
12+
from requests_toolbelt.multipart.encoder import MultipartEncoder
13+
from urllib3.filepost import encode_multipart_formdata
14+
15+
from pyclowder.datasets import get_file_list
16+
from pyclowder.collections import get_datasets, get_child_collections
17+
18+
# Some sources of urllib3 support warning suppression, but not all
19+
try:
20+
from urllib3 import disable_warnings
21+
from requests.packages.urllib3.exceptions import InsecureRequestWarning
22+
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
23+
except:
24+
pass
25+
26+
27+
# pylint: disable=too-many-arguments
28+
def download(connector, host, key, fileid, intermediatefileid=None, ext=""):
29+
"""Download file to be processed from Clowder.
30+
31+
Keyword arguments:
32+
connector -- connector information, used to get missing parameters and send status updates
33+
host -- the clowder host, including http and port, should end with a /
34+
key -- the secret key to login to clowder
35+
fileid -- the file that is currently being processed
36+
intermediatefileid -- either same as fileid, or the intermediate file to be used
37+
ext -- the file extension, the downloaded file will end with this extension
38+
"""
39+
40+
connector.message_process({"type": "file", "id": fileid}, "Downloading file.")
41+
42+
# TODO: intermediateid doesn't really seem to be used here, can we remove entirely?
43+
if not intermediatefileid:
44+
intermediatefileid = fileid
45+
46+
url = '%sapi/files/%s?key=%s' % (host, intermediatefileid, key)
47+
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True)
48+
49+
(inputfile, inputfilename) = tempfile.mkstemp(suffix=ext)
50+
51+
try:
52+
with os.fdopen(inputfile, "wb") as outputfile:
53+
for chunk in result.iter_content(chunk_size=10*1024):
54+
outputfile.write(chunk)
55+
return inputfilename
56+
except Exception:
57+
os.remove(inputfilename)
58+
raise
59+
60+
61+
def download_info(connector, host, key, fileid):
62+
"""Download file summary metadata from Clowder.
63+
64+
Keyword arguments:
65+
connector -- connector information, used to get missing parameters and send status updates
66+
host -- the clowder host, including http and port, should end with a /
67+
key -- the secret key to login to clowder
68+
fileid -- the file to fetch metadata of
69+
"""
70+
71+
url = '%sapi/files/%s/metadata?key=%s' % (host, fileid, key)
72+
73+
# fetch data
74+
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True)
75+
76+
return result
77+
78+
79+
def download_metadata(connector, host, key, fileid, extractor=None):
80+
"""Download file JSON-LD metadata from Clowder.
81+
82+
Keyword arguments:
83+
connector -- connector information, used to get missing parameters and send status updates
84+
host -- the clowder host, including http and port, should end with a /
85+
key -- the secret key to login to clowder
86+
fileid -- the file to fetch metadata of
87+
extractor -- extractor name to filter results (if only one extractor's metadata is desired)
88+
"""
89+
90+
filterstring = "" if extractor is None else "&extractor=%s" % extractor
91+
url = '%sapi/files/%s/metadata.jsonld?key=%s%s' % (host, fileid, key, filterstring)
92+
93+
# fetch data
94+
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True)
95+
96+
return result
97+
98+
99+
def submit_extraction(connector, host, key, fileid, extractorname):
100+
"""Submit file for extraction by given extractor.
101+
102+
Keyword arguments:
103+
connector -- connector information, used to get missing parameters and send status updates
104+
host -- the clowder host, including http and port, should end with a /
105+
key -- the secret key to login to clowder
106+
fileid -- the file UUID to submit
107+
extractorname -- registered name of extractor to trigger
108+
"""
109+
110+
url = "%sapi/files/%s/extractions?key=%s" % (host, fileid, key)
111+
112+
result = connector.post(url,
113+
headers={'Content-Type': 'application/json'},
114+
data=json.dumps({"extractor": extractorname}),
115+
verify=connector.ssl_verify if connector else True)
116+
117+
return result
118+
119+
120+
def submit_extractions_by_dataset(connector, host, key, datasetid, extractorname, ext=False):
121+
"""Manually trigger an extraction on all files in a dataset.
122+
123+
This will iterate through all files in the given dataset and submit them to
124+
the provided extractor.
125+
126+
Keyword arguments:
127+
connector -- connector information, used to get missing parameters and send status updates
128+
host -- the clowder host, including http and port, should end with a /
129+
key -- the secret key to login to clowder
130+
datasetid -- the dataset UUID to submit
131+
extractorname -- registered name of extractor to trigger
132+
ext -- extension to filter. e.g. 'tif' will only submit TIFF files for extraction.
133+
"""
134+
135+
filelist = get_file_list(connector, host, key, datasetid)
136+
137+
for f in filelist:
138+
# Only submit files that end with given extension, if specified
139+
if ext and not f['filename'].endswith(ext):
140+
continue
141+
142+
submit_extraction(connector, host, key, f['id'], extractorname)
143+
144+
145+
def submit_extractions_by_collection(connector, host, key, collectionid, extractorname, ext=False, recursive=True):
146+
"""Manually trigger an extraction on all files in a collection.
147+
148+
This will iterate through all datasets in the given collection and submit them to
149+
the submit_extractions_by_dataset(). Does not operate recursively if there are nested collections.
150+
151+
Keyword arguments:
152+
connector -- connector information, used to get missing parameters and send status updates
153+
host -- the clowder host, including http and port, should end with a /
154+
key -- the secret key to login to clowder
155+
collectionid -- the collection UUID to submit
156+
extractorname -- registered name of extractor to trigger
157+
ext -- extension to filter. e.g. 'tif' will only submit TIFF files for extraction
158+
recursive -- whether to also submit child collection files recursively (defaults to True)
159+
"""
160+
161+
dslist = get_datasets(connector, host, key, collectionid)
162+
163+
for ds in dslist:
164+
submit_extractions_by_dataset(connector, host, key, ds['id'], extractorname, ext)
165+
166+
if recursive:
167+
childcolls = get_child_collections(connector, host, key, collectionid)
168+
for coll in childcolls:
169+
submit_extractions_by_collection(connector, host, key, coll['id'], extractorname, ext, recursive)
170+
171+
172+
def upload_metadata(connector, host, key, fileid, metadata):
173+
"""Upload file JSON-LD metadata to Clowder.
174+
175+
Keyword arguments:
176+
connector -- connector information, used to get missing parameters and send status updates
177+
host -- the clowder host, including http and port, should end with a /
178+
key -- the secret key to login to clowder
179+
fileid -- the file that is currently being processed
180+
metadata -- the metadata to be uploaded
181+
"""
182+
183+
connector.message_process({"type": "file", "id": fileid}, "Uploading file metadata.")
184+
185+
headers = {'Content-Type': 'application/json'}
186+
url = '%sapi/files/%s/metadata.jsonld?key=%s' % (host, fileid, key)
187+
result = connector.post(url, headers=headers, data=json.dumps(metadata),
188+
verify=connector.ssl_verify if connector else True)
189+
190+
191+
# pylint: disable=too-many-arguments
192+
def upload_preview(connector, host, key, fileid, previewfile, previewmetadata=None, preview_mimetype=None):
193+
"""Upload preview to Clowder.
194+
195+
Keyword arguments:
196+
connector -- connector information, used to get missing parameters and send status updates
197+
host -- the clowder host, including http and port, should end with a /
198+
key -- the secret key to login to clowder
199+
fileid -- the file that is currently being processed
200+
previewfile -- the file containing the preview
201+
previewmetadata -- any metadata to be associated with preview, can contain a section_id
202+
to indicate the section this preview should be associated with.
203+
preview_mimetype -- (optional) MIME type of the preview file. By default, this is obtained from the
204+
file itself and this parameter can be ignored. E.g. 'application/vnd.clowder+custom+xml'
205+
"""
206+
207+
connector.message_process({"type": "file", "id": fileid}, "Uploading file preview.")
208+
209+
logger = logging.getLogger(__name__)
210+
headers = {'Content-Type': 'application/json'}
211+
212+
# upload preview
213+
url = '%sapi/previews?key=%s' % (host, key)
214+
with open(previewfile, 'rb') as filebytes:
215+
# If a custom preview file MIME type is provided, use it to generate the preview file object.
216+
if preview_mimetype is not None:
217+
result = connector.post(url, files={"File": (os.path.basename(previewfile), filebytes, preview_mimetype)},
218+
verify=connector.ssl_verify if connector else True)
219+
else:
220+
result = connector.post(url, files={"File": filebytes}, verify=connector.ssl_verify if connector else True)
221+
222+
previewid = result.json()['id']
223+
logger.debug("preview id = [%s]", previewid)
224+
225+
# associate uploaded preview with orginal file
226+
if fileid and not (previewmetadata and 'section_id' in previewmetadata and previewmetadata['section_id']):
227+
url = '%sapi/files/%s/previews/%s?key=%s' % (host, fileid, previewid, key)
228+
result = connector.post(url, headers=headers, data=json.dumps({}),
229+
verify=connector.ssl_verify if connector else True)
230+
231+
# associate metadata with preview
232+
if previewmetadata is not None:
233+
url = '%sapi/previews/%s/metadata?key=%s' % (host, previewid, key)
234+
result = connector.post(url, headers=headers, data=json.dumps(previewmetadata),
235+
verify=connector.ssl_verify if connector else True)
236+
237+
return previewid
238+
239+
240+
def upload_tags(connector, host, key, fileid, tags):
241+
"""Upload file tag to Clowder.
242+
243+
Keyword arguments:
244+
connector -- connector information, used to get missing parameters and send status updates
245+
host -- the clowder host, including http and port, should end with a /
246+
key -- the secret key to login to clowder
247+
fileid -- the file that is currently being processed
248+
tags -- the tags to be uploaded
249+
"""
250+
251+
connector.message_process({"type": "file", "id": fileid}, "Uploading file tags.")
252+
253+
headers = {'Content-Type': 'application/json'}
254+
url = '%sapi/files/%s/tags?key=%s' % (host, fileid, key)
255+
result = connector.post(url, headers=headers, data=json.dumps(tags),
256+
verify=connector.ssl_verify if connector else True)
257+
258+
259+
def upload_thumbnail(connector, host, key, fileid, thumbnail):
260+
"""Upload thumbnail to Clowder.
261+
262+
Keyword arguments:
263+
connector -- connector information, used to get missing parameters and send status updates
264+
host -- the clowder host, including http and port, should end with a /
265+
key -- the secret key to login to clowder
266+
fileid -- the file that the thumbnail should be associated with
267+
thumbnail -- the file containing the thumbnail
268+
"""
269+
270+
logger = logging.getLogger(__name__)
271+
url = host + 'api/fileThumbnail?key=' + key
272+
273+
# upload preview
274+
with open(thumbnail, 'rb') as inputfile:
275+
result = connector.post(url, files={"File": inputfile}, verify=connector.ssl_verify if connector else True)
276+
thumbnailid = result.json()['id']
277+
logger.debug("thumbnail id = [%s]", thumbnailid)
278+
279+
# associate uploaded preview with orginal file/dataset
280+
if fileid:
281+
headers = {'Content-Type': 'application/json'}
282+
url = host + 'api/files/' + fileid + '/thumbnails/' + thumbnailid + '?key=' + key
283+
connector.post(url, headers=headers, data=json.dumps({}), verify=connector.ssl_verify if connector else True)
284+
285+
return thumbnailid
286+
287+
288+
def upload_to_dataset(connector, host, key, datasetid, filepath, check_duplicate=False):
289+
"""Upload file to existing Clowder dataset.
290+
291+
Keyword arguments:
292+
connector -- connector information, used to get missing parameters and send status updates
293+
host -- the clowder host, including http and port, should end with a /
294+
key -- the secret key to login to clowder
295+
datasetid -- the dataset that the file should be associated with
296+
filepath -- path to file
297+
check_duplicate -- check if filename already exists in dataset and skip upload if so
298+
"""
299+
300+
logger = logging.getLogger(__name__)
301+
302+
if check_duplicate:
303+
ds_files = get_file_list(connector, host, key, datasetid)
304+
for f in ds_files:
305+
if f['filename'] == os.path.basename(filepath):
306+
logger.debug("found %s in dataset %s; not re-uploading" % (f['filename'], datasetid))
307+
return None
308+
309+
for source_path in connector.mounted_paths:
310+
if filepath.startswith(connector.mounted_paths[source_path]):
311+
return _upload_to_dataset_local(connector, host, key, datasetid, filepath)
312+
313+
url = '%sapi/uploadToDataset/%s?key=%s' % (host, datasetid, key)
314+
315+
if os.path.exists(filepath):
316+
filename = os.path.basename(filepath)
317+
m = MultipartEncoder(
318+
fields={'file': (filename, open(filepath, 'rb'))}
319+
)
320+
result = connector.post(url, data=m, headers={'Content-Type': m.content_type},
321+
verify=connector.ssl_verify if connector else True)
322+
323+
uploadedfileid = result.json()['id']
324+
logger.debug("uploaded file id = [%s]", uploadedfileid)
325+
326+
return uploadedfileid
327+
else:
328+
logger.error("unable to upload file %s (not found)", filepath)
329+
330+
331+
def _upload_to_dataset_local(connector, host, key, datasetid, filepath):
332+
"""Upload file POINTER to existing Clowder dataset. Does not copy actual file bytes.
333+
334+
Keyword arguments:
335+
connector -- connector information, used to get missing parameters and send status updates
336+
host -- the clowder host, including http and port, should end with a /
337+
key -- the secret key to login to clowder
338+
datasetid -- the dataset that the file should be associated with
339+
filepath -- path to file
340+
"""
341+
342+
logger = logging.getLogger(__name__)
343+
url = '%sapi/uploadToDataset/%s?key=%s' % (host, datasetid, key)
344+
345+
if os.path.exists(filepath):
346+
# Replace local path with remote path before uploading
347+
for source_path in connector.mounted_paths:
348+
if filepath.startswith(connector.mounted_paths[source_path]):
349+
filepath = filepath.replace(connector.mounted_paths[source_path],
350+
source_path)
351+
break
352+
353+
filename = os.path.basename(filepath)
354+
m = MultipartEncoder(
355+
fields={'file': (filename, open(filepath, 'rb'))}
356+
)
357+
result = connector.post(url, data=m, headers={'Content-Type': m.content_type},
358+
verify=connector.ssl_verify if connector else True)
359+
360+
uploadedfileid = result.json()['id']
361+
logger.debug("uploaded file id = [%s]", uploadedfileid)
362+
363+
return uploadedfileid
364+
else:
365+
logger.error("unable to upload local file %s (not found)", filepath)

pyclowder/api/v2/files.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def download_metadata(connector, host, key, fileid, extractor=None, token=None):
101101
# fetch data
102102
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True, headers=headers)
103103

104-
return result.json()
104+
return result
105105

106106

107107
def submit_extraction(connector, host, key, fileid, extractorname, token=None):
@@ -122,7 +122,7 @@ def submit_extraction(connector, host, key, fileid, extractorname, token=None):
122122
data=json.dumps({"extractor": extractorname}),
123123
verify=connector.ssl_verify if connector else True)
124124

125-
return result.json()
125+
return result
126126

127127

128128
# TODO not implemented in v2

0 commit comments

Comments
 (0)