Skip to content

Commit bce7040

Browse files
committed
Added _path_ and _size_ columns and removed _filename_0.
No longer search for uppercase file extensions. Retain PIL.Image filename info if available. Misc flake8 cleanup. Signed-off-by: Jonathan Walker <[email protected]>
1 parent 0890b0c commit bce7040

File tree

1 file changed

+47
-41
lines changed

1 file changed

+47
-41
lines changed

swat/cas/datamsghandlers.py

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import base64
2828
import copy
2929
import io
30-
import pytz
30+
import os
3131
import re
3232
import datetime
3333
import warnings
@@ -52,6 +52,7 @@
5252
from ..dataframe import SASDataFrame
5353
from ..utils import getsoptions
5454
from .connection import getone, CASRequest, CASResponse
55+
5556
try:
5657
import PIL
5758
except ImportError:
@@ -1181,7 +1182,7 @@ def getrow(self, row):
11811182

11821183

11831184
class Image(CASDataMsgHandler):
1184-
'''CAS data message handler for images.
1185+
"""CAS data message handler for images.
11851186
11861187
Parameters
11871188
----------
@@ -1193,16 +1194,18 @@ class Image(CASDataMsgHandler):
11931194
a list of file paths specifying the location of each image.
11941195
- iterable of :class:`numpy.ndarray`
11951196
a list of arrays where each array contains the pixel values for the image.
1196-
Arrays should be (height, width) or (height, width, 3). Channel order is assumed to be RGB.
1197+
Arrays should be (height, width) or (height, width, 3). Channel order is
1198+
assumed to be RGB.
11971199
- iterable of :class:`PIL.Image.Image`
11981200
a list of Pillow Image objects.
11991201
nrecs : int, optional
12001202
The number of rows to allocate in the buffer. This can be
12011203
smaller than the number of totals rows since they are uploaded
12021204
in batches `nrecs` long.
12031205
subdirs : bool, optional
1204-
Whether to search subdirectories for additional images. Only applies when `data` is a path to a directory.
1205-
If images are read from subdirectories, the name of the subdirectory will be used as the image class label.
1206+
Whether to search subdirectories for additional images. Only applies when
1207+
`data` is a path to a directory. If images are read from subdirectories, the
1208+
name of the subdirectory will be used as the image class label.
12061209
12071210
See Also
12081211
--------
@@ -1212,15 +1215,15 @@ class Image(CASDataMsgHandler):
12121215
12131216
Notes
12141217
-----
1215-
When using this data message handler to upload client-side images to CAS, the behavior should be similar to that
1216-
of the image.loadImages_ CAS action for loading server-side images:
1217-
1218-
.. _image.loadImages: https://go.documentation.sas.com/doc/en/pgmsascdc/v_028/casactml/casactml_image_details22.htm
1218+
When using this data message handler to upload client-side images to CAS, the
1219+
behavior should be similar to that of the image.loadImages_ CAS action for loading
1220+
server-side images:
12191221
1222+
.. _image.loadImages: https://go.documentation.sas.com/doc/en/pgmsascdc/v_028/casactml/casactml_image_details22.htm # noqa: E501
12201223
1221-
Although images will be stored in binary format to a CAS table column labeled "_image_", the CAS table metadata will
1222-
not indicate that this column should be interpreted as images. Use the altertable CAS action to update the
1223-
column's type:
1224+
Although images will be stored in binary format to a CAS table column
1225+
labeled "_image_", the CAS table metadata will not indicate that this column should
1226+
be interpreted as images. Use the altertable CAS action to update the column's type:
12241227
12251228
>>> conn.addtable(table='mytable', **imagedmh.args.addtable)
12261229
>>> conn.altertable(table='mytable', columns=[{'name': '_image_', 'binaryType': 'image'}])
@@ -1231,7 +1234,7 @@ class Image(CASDataMsgHandler):
12311234
>>> conn.addtable(table='mytable', **dmh.args.addtable).casTable
12321235
... CASTable('MYTABLE', caslib='CASUSER(user)')
12331236
1234-
'''
1237+
"""
12351238

12361239
def __init__(self, data, nrecs=1000, subdirs=True):
12371240
if isinstance(data, (str, Path)):
@@ -1240,32 +1243,32 @@ def __init__(self, data, nrecs=1000, subdirs=True):
12401243

12411244
# Search for all images in the directory and (optionally) in subdirectories
12421245
for extension in (
1243-
'bmp', 'dib', 'jpg', 'jpeg', 'jpe', 'jp2', 'png', 'pbm', 'pmg', 'ppm', 'tif', 'tiff', 'webp'):
1246+
'bmp', 'dib', 'jpg', 'jpeg', 'jpe', 'jp2', 'png', 'pbm', 'pmg', 'ppm',
1247+
'tif', 'tiff', 'webp'):
1248+
12441249
if subdirs:
12451250
files.extend(path.glob(f'**/*.{extension}'))
1246-
files.extend(path.glob(f'**/*.{extension.upper()}'))
12471251
else:
12481252
files.extend(path.glob(f'*.{extension}'))
1249-
files.extend(path.glob(f'*.{extension.upper()}'))
12501253
self._data = files
12511254
else:
12521255
self._data = list(data)
12531256

12541257
self._subdirs = subdirs
12551258

12561259
variables = [
1257-
dict(name='_image_', rtype='CHAR', type='VARBINARY', offset=0, length=16),
1258-
dict(name='_label_', rtype='CHAR', type='VARCHAR', offset=16, length=16),
1259-
dict(name='_filename_0', rtype='CHAR', type='VARCHAR', offset=32, length=16),
1260-
dict(name='_path_', rtype='CHAR', type='VARCHAR', offset=48, length=16),
1261-
dict(name='_id_', rtype='NUMERIC', type='INT64', offset=64, length=8),
1260+
dict(name='_image_', rtype='CHAR', type='VARBINARY'),
1261+
dict(name='_label_', rtype='CHAR', type='VARCHAR'),
1262+
dict(name='_size_', rtype='NUMERIC', type='INT64'),
1263+
dict(name='_path_', rtype='CHAR', type='VARCHAR'),
1264+
dict(name='_type_', rtype='CHAR', type='VARCHAR'),
1265+
dict(name='_id_', rtype='NUMERIC', type='INT64')
12621266
]
1263-
reclen = sum([variable['length'] for variable in variables])
12641267

1265-
super(Image, self).__init__(variables, nrecs=nrecs, reclen=reclen)
1268+
super(Image, self).__init__(variables, nrecs=nrecs)
12661269

12671270
def getrow(self, row):
1268-
'''Get a row of values from the data source
1271+
"""Get a row of values from the data source
12691272
12701273
Parameters
12711274
----------
@@ -1277,35 +1280,26 @@ def getrow(self, row):
12771280
list-of-any
12781281
One row of data values
12791282
1280-
Raises
1281-
------
1282-
RuntimeError
1283-
If processing Numpy arrays and :mod:`PIL` package is not installed.
1284-
1285-
'''
1283+
"""
12861284
if row >= len(self._data):
12871285
return
12881286

12891287
record = self._data[row]
1290-
label = ''
1291-
filename = 'Image_%d.png' % (row + 1)
1292-
path = filename
1288+
1289+
# Default value. Will be overridden if disk location is known.
1290+
path = 'Image_%d.png' % (row + 1)
12931291

12941292
# Input is path to an image on disk. Can just read bytes directly.
12951293
if isinstance(record, (str, Path)):
12961294
with open(record, 'rb') as f:
12971295
image = f.read()
1298-
filename = Path(record).name
12991296
path = str(record)
1300-
1301-
# If images were pulled from subdirectories, then the image label is the directory name.
1302-
if self._subdirs:
1303-
label = Path(record).parent.name
13041297
else:
13051298
# Otherwise, PIL package is required to format data as an image.
13061299
if PIL is None:
13071300
raise RuntimeError(
1308-
'Formatting data as images requires the Pillow package (https://pypi.org/project/Pillow/).')
1301+
'Formatting data as images requires the Pillow package '
1302+
'(https://pypi.org/project/Pillow/).')
13091303

13101304
# Convert Numpy array to Image
13111305
if isinstance(record, np.ndarray):
@@ -1314,8 +1308,20 @@ def getrow(self, row):
13141308
# Get bytes from Image instance
13151309
if isinstance(record, PIL.Image.Image):
13161310
buffer = io.BytesIO()
1317-
record.save(buffer, format='png')
1311+
1312+
# If image was loaded from disk it may have attribute with filename
1313+
if hasattr(record, 'filename'):
1314+
record.save(buffer, format=record.format)
1315+
path = record.filename
1316+
else:
1317+
record.save(buffer, format='png')
13181318
buffer.seek(0)
13191319
image = buffer.read()
13201320

1321-
return [image, label, filename, path, row + 1]
1321+
# Use folder name if images loaded from subdirectories
1322+
label = os.path.basename(os.path.dirname(path)) if self._subdirs else ''
1323+
1324+
image_type = os.path.splitext(path)[-1].lower().lstrip('.')
1325+
size = len(image)
1326+
1327+
return [image, label, size, path, image_type, row + 1]

0 commit comments

Comments
 (0)