Skip to content

Commit b00dc30

Browse files
authored
Merge pull request #137 from jlwalke2/master
Image DataMsgHandler
2 parents 0a2e93b + 1e94bc1 commit b00dc30

File tree

2 files changed

+169
-2
lines changed

2 files changed

+169
-2
lines changed

doc/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,7 @@ handle the piece-wise loading of data into the server.
847847
Excel
848848
Clipboard
849849
DBAPI
850+
Image
850851

851852

852853
Date and Time Functions

swat/cas/datamsghandlers.py

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,15 @@
2222
'''
2323

2424
from __future__ import print_function, division, absolute_import, unicode_literals
25+
from glob import glob
2526

2627
import base64
2728
import copy
28-
import pytz
29-
import re
3029
import datetime
30+
import io
31+
import os
32+
import re
33+
import sys
3134
import warnings
3235
import numpy as np
3336
import pandas as pd
@@ -51,6 +54,10 @@
5154
from ..utils import getsoptions
5255
from .connection import getone, CASRequest, CASResponse
5356

57+
try:
58+
import PIL
59+
except ImportError:
60+
PIL = None
5461

5562
_SIZES = {
5663
'char': 1,
@@ -1173,3 +1180,162 @@ def getrow(self, row):
11731180
del self._firstrow
11741181
return row
11751182
return self.cursor.fetchone()
1183+
1184+
1185+
class Image(CASDataMsgHandler):
1186+
"""CAS data message handler for images.
1187+
1188+
Parameters
1189+
----------
1190+
data : str, pathlib.Path, or iterable of images
1191+
Multiple formats are supported for providing images:
1192+
- str or :class:`pathlib.Path`
1193+
Path to a directory containing one or more images.
1194+
- iterable of str or :class:`pathlib.Path`
1195+
a list of file paths specifying the location of each image.
1196+
- iterable of :class:`numpy.ndarray`
1197+
a list of arrays where each array contains the pixel values for the image.
1198+
Arrays should be (height, width) or (height, width, 3). Channel order is
1199+
assumed to be RGB.
1200+
- iterable of :class:`PIL.Image.Image`
1201+
a list of Pillow Image objects.
1202+
nrecs : int, optional
1203+
The number of rows to allocate in the buffer. This can be
1204+
smaller than the number of totals rows since they are uploaded
1205+
in batches `nrecs` long.
1206+
subdirs : bool, optional
1207+
Whether to search subdirectories for additional images. Only applies when
1208+
`data` is a path to a directory. If images are read from subdirectories, the
1209+
name of the subdirectory will be used as the image class label.
1210+
1211+
See Also
1212+
--------
1213+
:class:`CASDataMsgHandler`
1214+
:class:`numpy.ndarray`
1215+
:class:`PIL.Image.Image`
1216+
1217+
Notes
1218+
-----
1219+
When using this data message handler to upload client-side images to CAS, the
1220+
behavior should be similar to that of the image.loadImages_ CAS action for loading
1221+
server-side images:
1222+
1223+
.. _image.loadImages: https://go.documentation.sas.com/doc/en/pgmsascdc/v_028/casactml/casactml_image_details22
1224+
.htm # noqa: E501
1225+
1226+
Although images will be stored in binary format to a CAS table column
1227+
labeled "_image_", the CAS table metadata will not indicate that this column should
1228+
be interpreted as images. Use the altertable CAS action to update the column's type:
1229+
1230+
>>> conn.addtable(table='mytable', **imagedmh.args.addtable)
1231+
>>> conn.altertable(table='mytable', columns=[{'name': '_image_', 'binaryType': 'image'}])
1232+
1233+
Examples
1234+
--------
1235+
>>> dmh = Image('/path/to/images')
1236+
>>> conn.addtable(table='mytable', **dmh.args.addtable).casTable
1237+
... CASTable('MYTABLE', caslib='CASUSER(user)')
1238+
1239+
"""
1240+
def __init__(self, data, nrecs=1000, subdirs=True):
1241+
# To maintain Py2.7 compatibility, use strings instead of Paths.
1242+
if type(data).__module__ == 'pathlib':
1243+
data = str(data)
1244+
1245+
if isinstance(data, str):
1246+
files = []
1247+
extensions = ['bmp', 'dib', 'jpg', 'jpeg', 'jpe', 'jp2', 'png', 'pbm', 'pmg',
1248+
'ppm', 'tif', 'tiff', 'webp']
1249+
1250+
# Also search for uppercase file extensions if not running on a
1251+
# case-insensitive OS (Windows).
1252+
if not sys.platform.startswith('win'):
1253+
extensions += [x.upper() for x in extensions]
1254+
1255+
# Search for all images in the directory and (optionally) in subdirectories
1256+
for extension in extensions:
1257+
if subdirs:
1258+
pattern = os.path.join(data, '**', '*.%s' % extension)
1259+
else:
1260+
pattern = os.path.join(data, '*.%s' % extension)
1261+
1262+
files.extend(glob(pattern, recursive=subdirs))
1263+
self._data = files
1264+
else:
1265+
self._data = list(data)
1266+
1267+
self._subdirs = subdirs
1268+
1269+
variables = [
1270+
dict(name='_image_', rtype='CHAR', type='VARBINARY'),
1271+
dict(name='_label_', rtype='CHAR', type='VARCHAR'),
1272+
dict(name='_size_', rtype='NUMERIC', type='INT64'),
1273+
dict(name='_path_', rtype='CHAR', type='VARCHAR'),
1274+
dict(name='_type_', rtype='CHAR', type='VARCHAR'),
1275+
dict(name='_id_', rtype='NUMERIC', type='INT64')
1276+
]
1277+
1278+
super(Image, self).__init__(variables, nrecs=nrecs)
1279+
1280+
def getrow(self, row):
1281+
"""Get a row of values from the data source
1282+
1283+
Parameters
1284+
----------
1285+
row : int
1286+
The row index to return.
1287+
1288+
Returns
1289+
-------
1290+
list-of-any
1291+
One row of data values
1292+
1293+
"""
1294+
if row >= len(self._data):
1295+
return
1296+
1297+
record = self._data[row]
1298+
1299+
# Convert Path instances to str for Py2.7 compatibility.
1300+
if type(record).__module__ == 'pathlib':
1301+
record = str(record)
1302+
1303+
# Default value. Will be overridden if disk location is known.
1304+
path = 'Image_%d.png' % (row + 1)
1305+
1306+
# Input is path to an image on disk. Can just read bytes directly.
1307+
if isinstance(record, str):
1308+
with open(record, 'rb') as f:
1309+
image = f.read()
1310+
path = record
1311+
else:
1312+
# Otherwise, PIL package is required to format data as an image.
1313+
if PIL is None:
1314+
raise RuntimeError(
1315+
'Formatting data as images requires the Pillow package '
1316+
'(https://pypi.org/project/Pillow/).')
1317+
1318+
# Convert Numpy array to Image
1319+
if isinstance(record, np.ndarray):
1320+
record = PIL.Image.fromarray(record)
1321+
1322+
# Get bytes from Image instance
1323+
if isinstance(record, PIL.Image.Image):
1324+
buffer = io.BytesIO()
1325+
1326+
# If image was loaded from disk it may have attribute with filename
1327+
if hasattr(record, 'filename'):
1328+
record.save(buffer, format=record.format)
1329+
path = record.filename
1330+
else:
1331+
record.save(buffer, format='png')
1332+
buffer.seek(0)
1333+
image = buffer.read()
1334+
1335+
# Use folder name if images loaded from subdirectories
1336+
label = os.path.basename(os.path.dirname(path)) if self._subdirs else ''
1337+
1338+
image_type = os.path.splitext(path)[-1].lower().lstrip('.')
1339+
size = len(image)
1340+
1341+
return [image, label, size, path, image_type, row + 1]

0 commit comments

Comments
 (0)