Skip to content

Commit 78b1cea

Browse files
Upload Subject Attached Media (#331)
* add_attached_images * WIP update save attached_image with option to async save WIP: for some reason when utilizing threading, logged in client does not persist among threads. added a client parameter to post to /attached_media route and private _save_attached_image so logged in client can bubble down.. ideally, this would be saved in threading.local() * add metadata as param to ancillary data upload * remove pdb trace * return future object to bubble up thread exceptions * update mimetype detection using newer logic from master * add media type validation to upload subject attacned images * fix detect mime type typo * update upload ancillary data to return Future . Caller will need to call .result() to see if successful/errors * hound indentation sniffs * add doc strings to newly added methods * Update panoptes_client/subject.py Co-authored-by: Cliff Johnson <cliff@zooniverse.org> * Update panoptes_client/subject.py Co-authored-by: Cliff Johnson <cliff@zooniverse.org> * Update panoptes_client/subject.py Co-authored-by: Cliff Johnson <cliff@zooniverse.org> * Update panoptes_client/subject.py Co-authored-by: Cliff Johnson <cliff@zooniverse.org> * check subject existence on new attached_images methods, make add_attached_image a private method since used internally, add eg. on saved attached image for externally hosted files * add comments on examples in doc strings * update content type on private method to keyword arg (updating calls to changed method to match order) --------- Co-authored-by: Cliff Johnson <cliff@zooniverse.org>
1 parent 813351b commit 78b1cea

File tree

1 file changed

+195
-16
lines changed

1 file changed

+195
-16
lines changed

panoptes_client/subject.py

Lines changed: 195 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
from panoptes_client.panoptes import (
3737
LinkResolver,
38+
ObjectNotSavedException,
3839
Panoptes,
3940
PanoptesAPIException,
4041
PanoptesObject,
@@ -75,7 +76,9 @@ class Subject(PanoptesObject):
7576
@classmethod
7677
def async_saves(cls):
7778
"""
78-
Returns a context manager to allow asynchronously creating subjects.
79+
Returns a context manager to allow asynchronously creating subjects
80+
or creating and uploading subject attached images/media.
81+
7982
Using this context manager will create a pool of threads which will
8083
create multiple subjects at once and upload any local files
8184
simultaneously.
@@ -90,6 +93,12 @@ def async_saves(cls):
9093
s.add_location(filename)
9194
s.save()
9295
96+
with Subject.async_saves():
97+
local_files = [...]
98+
for filename in local_files:
99+
s = Subject(1234)
100+
s.save_attached_image(local_file)
101+
93102
Alternatively, you can manually shut down the thread pool::
94103
95104
pool = Subject.async_saves()
@@ -205,6 +214,27 @@ def _upload_media(self, url, media_data, media_type):
205214
upload_response.raise_for_status()
206215
return upload_response
207216

217+
def _detect_media_type(self, media_data=None, manual_mimetype=None):
218+
if manual_mimetype is not None:
219+
return manual_mimetype
220+
221+
if MEDIA_TYPE_DETECTION == 'magic':
222+
return magic.from_buffer(media_data, mime=True)
223+
224+
media_type = mimetypes.guess_type(media_data)[0]
225+
if not media_type:
226+
raise UnknownMediaException(
227+
'Could not detect file type. Please try installing '
228+
'libmagic: https://panoptes-python-client.readthedocs.'
229+
'io/en/latest/user_guide.html#uploading-non-image-'
230+
'media-types'
231+
)
232+
return media_type
233+
234+
def _validate_media_type(self, media_type=None):
235+
if media_type not in ALLOWED_MIME_TYPES:
236+
raise UnknownMediaException(f"File type {media_type} is not allowed.")
237+
208238
@property
209239
def async_save_result(self):
210240
"""
@@ -222,6 +252,19 @@ def async_save_result(self):
222252
else:
223253
return False
224254

255+
@property
256+
def attached_images(self):
257+
"""
258+
A dict containing attached images/media of a subject. This should NOT
259+
be confused with subject locations. A subject_location is a media
260+
record that saves the location of the media that will be classified in a project's classifier.
261+
A subject_attached_image is a media record that serves as
262+
ancillary/auxiliary media to the subject and will be shown on a subject's Talk page.
263+
"""
264+
if self.id is None:
265+
raise ObjectNotSavedException
266+
return self.http_get('{}/attached_images'.format(self.id))[0]
267+
225268
def set_raw(self, raw, etag=None, loaded=True):
226269
super(Subject, self).set_raw(raw, etag, loaded)
227270
if loaded and self.metadata:
@@ -266,29 +309,165 @@ def add_location(self, location, manual_mimetype=None):
266309

267310
try:
268311
media_data = f.read()
269-
if manual_mimetype is not None:
270-
media_type = manual_mimetype
271-
elif MEDIA_TYPE_DETECTION == 'magic':
272-
media_type = magic.from_buffer(media_data, mime=True)
273-
else:
274-
media_type = mimetypes.guess_type(location)[0]
275-
if not media_type:
276-
raise UnknownMediaException(
277-
'Could not detect file type. Please try installing '
278-
'libmagic: https://panoptes-python-client.readthedocs.'
279-
'io/en/latest/user_guide.html#uploading-non-image-'
280-
'media-types'
281-
)
312+
media_type = self._detect_media_type(media_data, manual_mimetype)
282313

283-
if media_type not in ALLOWED_MIME_TYPES:
284-
raise UnknownMediaException(f"File type {media_type} is not allowed.")
314+
self._validate_media_type(media_type)
285315

286316
self.locations.append(media_type)
287317
self._media_files.append(media_data)
288318
self.modified_attributes.add('locations')
289319
finally:
290320
f.close()
291321

322+
def _add_attached_image(
323+
self,
324+
src=None,
325+
content_type='image/png',
326+
external_link=True,
327+
metadata=None,
328+
client=None,
329+
):
330+
if self.id is None:
331+
raise ObjectNotSavedException
332+
metadata = metadata or {}
333+
media_data = {
334+
'content_type': content_type,
335+
'external_link': external_link,
336+
'metadata': metadata,
337+
}
338+
if src:
339+
media_data['src'] = src
340+
341+
if not client:
342+
client = Panoptes.client()
343+
344+
with client:
345+
json_response, _ = self.http_post('{}/attached_images'.format(self.id), json={'media': media_data})
346+
347+
return json_response['media'][0]['src']
348+
349+
def _save_attached_image(self, attached_media, manual_mimetype=None, metadata=None, client=None):
350+
if not client:
351+
client = Panoptes.client()
352+
353+
with client:
354+
metadata = metadata or {}
355+
356+
if type(attached_media) is dict:
357+
for content_type, url in attached_media.items():
358+
self._add_attached_image(
359+
src=url,
360+
content_type=content_type,
361+
metadata=metadata,
362+
external_link=True,
363+
)
364+
return
365+
elif type(attached_media) in (str,) + _OLD_STR_TYPES:
366+
f = open(attached_media, 'rb')
367+
else:
368+
f = attached_media
369+
370+
media_type = None
371+
try:
372+
media_data = f.read()
373+
media_type = self._detect_media_type(media_data, manual_mimetype)
374+
self._validate_media_type(media_type)
375+
finally:
376+
f.close()
377+
file_url = self._add_attached_image(
378+
src=None,
379+
content_type=media_type,
380+
metadata=metadata,
381+
external_link=False,
382+
)
383+
self._upload_media(file_url, media_data, media_type)
384+
385+
def save_attached_image(
386+
self,
387+
attached_media,
388+
manual_mimetype=None,
389+
metadata=None,
390+
client=None
391+
):
392+
"""
393+
Add a attached_media to this subject.
394+
NOTE: This should NOT be confused with subject location.
395+
A subject location is the content of the subject that a volunteer will classify.
396+
A subject attached_media is ancillary data associated to the subject that get displayed on the Subject's Talk Page.
397+
398+
- **attached_media** can be an open :py:class:`file` object, a path to a
399+
local file, or a :py:class:`dict` containing MIME types and URLs for
400+
remote media.
401+
- **manual_mimetype** optional string, passes in a specific MIME type for media item.
402+
- **metadata** can be a :py:class:`dict` that stores additional info on attached_media.
403+
- **client** optional Panoptes.client() instance. Sent as a parameter for threading purposes for parallelization so that thread uses the correct client context.
404+
405+
Examples::
406+
407+
# Upload image by sending in a :py:class:`file` object
408+
subject.save_attached_image(my_file)
409+
# Upload local image by passing path to file
410+
subject.save_attached_image('/data/image.jpg')
411+
# Upload local image and set mimetype and record's metadata
412+
subject.save_attached_image(attached_media=my_file, manual_mimetype='image/jpg', metadata={'metadata_test': 'Object 1'})
413+
# Upload externally hosted image
414+
subject.save_attached_image({"image/png": "https://example.com/test.png"})
415+
416+
We can utilize `async_saves` to upload/save attached_images in parallel.
417+
418+
Examples::
419+
from concurrent.futures import as_completed
420+
subject = Subject(1234)
421+
422+
# list of file locations
423+
local_files = [...]
424+
425+
with Subject.async_saves():
426+
future_to_file = {subject.save_attached_image(file_location): file_location for file_location in local_files}
427+
for future in as_completed(future_to_file):
428+
local_file = future_to_file[future]
429+
try:
430+
future.result()
431+
except Exception as exc:
432+
print(f"Upload failed for {local_file}")
433+
434+
"""
435+
if not client:
436+
client = Panoptes.client()
437+
438+
async_save = hasattr(self._local, 'save_exec')
439+
440+
future_result = None
441+
with client:
442+
metadata = metadata or {}
443+
444+
try:
445+
if async_save:
446+
upload_exec = self._local.save_exec
447+
else:
448+
upload_exec = ThreadPoolExecutor(max_workers=ASYNC_SAVE_THREADS)
449+
future_result = upload_exec.submit(
450+
retry,
451+
self._save_attached_image,
452+
args=(
453+
attached_media,
454+
manual_mimetype,
455+
metadata,
456+
client
457+
),
458+
attempts=UPLOAD_RETRY_LIMIT,
459+
sleeptime=RETRY_BACKOFF_INTERVAL,
460+
retry_exceptions=(
461+
requests.exceptions.RequestException
462+
),
463+
log_args=False,
464+
)
465+
finally:
466+
if not async_save:
467+
# Shuts down and waits for the task if this isn't being used in a `async_saves` block
468+
upload_exec.shutdown(wait=True)
469+
return future_result
470+
292471

293472
class UnknownMediaException(Exception):
294473
pass

0 commit comments

Comments
 (0)