|
22 | 22 | '''
|
23 | 23 |
|
24 | 24 | from __future__ import print_function, division, absolute_import, unicode_literals
|
| 25 | +from glob import glob |
25 | 26 |
|
26 | 27 | import base64
|
27 | 28 | import copy
|
28 |
| -import pytz |
29 |
| -import re |
30 | 29 | import datetime
|
| 30 | +import io |
| 31 | +import os |
| 32 | +import re |
| 33 | +import sys |
31 | 34 | import warnings
|
32 | 35 | import numpy as np
|
33 | 36 | import pandas as pd
|
|
51 | 54 | from ..utils import getsoptions
|
52 | 55 | from .connection import getone, CASRequest, CASResponse
|
53 | 56 |
|
| 57 | +try: |
| 58 | + import PIL |
| 59 | +except ImportError: |
| 60 | + PIL = None |
54 | 61 |
|
55 | 62 | _SIZES = {
|
56 | 63 | 'char': 1,
|
@@ -1173,3 +1180,162 @@ def getrow(self, row):
|
1173 | 1180 | del self._firstrow
|
1174 | 1181 | return row
|
1175 | 1182 | return self.cursor.fetchone()
|
| 1183 | + |
| 1184 | + |
| 1185 | +class Image(CASDataMsgHandler): |
| 1186 | + """CAS data message handler for images. |
| 1187 | +
|
| 1188 | + Parameters |
| 1189 | + ---------- |
| 1190 | + data : str, pathlib.Path, or iterable of images |
| 1191 | + Multiple formats are supported for providing images: |
| 1192 | + - str or :class:`pathlib.Path` |
| 1193 | + Path to a directory containing one or more images. |
| 1194 | + - iterable of str or :class:`pathlib.Path` |
| 1195 | + a list of file paths specifying the location of each image. |
| 1196 | + - iterable of :class:`numpy.ndarray` |
| 1197 | + a list of arrays where each array contains the pixel values for the image. |
| 1198 | + Arrays should be (height, width) or (height, width, 3). Channel order is |
| 1199 | + assumed to be RGB. |
| 1200 | + - iterable of :class:`PIL.Image.Image` |
| 1201 | + a list of Pillow Image objects. |
| 1202 | + nrecs : int, optional |
| 1203 | + The number of rows to allocate in the buffer. This can be |
| 1204 | + smaller than the number of totals rows since they are uploaded |
| 1205 | + in batches `nrecs` long. |
| 1206 | + subdirs : bool, optional |
| 1207 | + Whether to search subdirectories for additional images. Only applies when |
| 1208 | + `data` is a path to a directory. If images are read from subdirectories, the |
| 1209 | + name of the subdirectory will be used as the image class label. |
| 1210 | +
|
| 1211 | + See Also |
| 1212 | + -------- |
| 1213 | + :class:`CASDataMsgHandler` |
| 1214 | + :class:`numpy.ndarray` |
| 1215 | + :class:`PIL.Image.Image` |
| 1216 | +
|
| 1217 | + Notes |
| 1218 | + ----- |
| 1219 | + When using this data message handler to upload client-side images to CAS, the |
| 1220 | + behavior should be similar to that of the image.loadImages_ CAS action for loading |
| 1221 | + server-side images: |
| 1222 | +
|
| 1223 | + .. _image.loadImages: https://go.documentation.sas.com/doc/en/pgmsascdc/v_028/casactml/casactml_image_details22 |
| 1224 | + .htm # noqa: E501 |
| 1225 | +
|
| 1226 | + Although images will be stored in binary format to a CAS table column |
| 1227 | + labeled "_image_", the CAS table metadata will not indicate that this column should |
| 1228 | + be interpreted as images. Use the altertable CAS action to update the column's type: |
| 1229 | +
|
| 1230 | + >>> conn.addtable(table='mytable', **imagedmh.args.addtable) |
| 1231 | + >>> conn.altertable(table='mytable', columns=[{'name': '_image_', 'binaryType': 'image'}]) |
| 1232 | +
|
| 1233 | + Examples |
| 1234 | + -------- |
| 1235 | + >>> dmh = Image('/path/to/images') |
| 1236 | + >>> conn.addtable(table='mytable', **dmh.args.addtable).casTable |
| 1237 | + ... CASTable('MYTABLE', caslib='CASUSER(user)') |
| 1238 | +
|
| 1239 | + """ |
| 1240 | + def __init__(self, data, nrecs=1000, subdirs=True): |
| 1241 | + # To maintain Py2.7 compatibility, use strings instead of Paths. |
| 1242 | + if type(data).__module__ == 'pathlib': |
| 1243 | + data = str(data) |
| 1244 | + |
| 1245 | + if isinstance(data, str): |
| 1246 | + files = [] |
| 1247 | + extensions = ['bmp', 'dib', 'jpg', 'jpeg', 'jpe', 'jp2', 'png', 'pbm', 'pmg', |
| 1248 | + 'ppm', 'tif', 'tiff', 'webp'] |
| 1249 | + |
| 1250 | + # Also search for uppercase file extensions if not running on a |
| 1251 | + # case-insensitive OS (Windows). |
| 1252 | + if not sys.platform.startswith('win'): |
| 1253 | + extensions += [x.upper() for x in extensions] |
| 1254 | + |
| 1255 | + # Search for all images in the directory and (optionally) in subdirectories |
| 1256 | + for extension in extensions: |
| 1257 | + if subdirs: |
| 1258 | + pattern = os.path.join(data, '**', '*.%s' % extension) |
| 1259 | + else: |
| 1260 | + pattern = os.path.join(data, '*.%s' % extension) |
| 1261 | + |
| 1262 | + files.extend(glob(pattern, recursive=subdirs)) |
| 1263 | + self._data = files |
| 1264 | + else: |
| 1265 | + self._data = list(data) |
| 1266 | + |
| 1267 | + self._subdirs = subdirs |
| 1268 | + |
| 1269 | + variables = [ |
| 1270 | + dict(name='_image_', rtype='CHAR', type='VARBINARY'), |
| 1271 | + dict(name='_label_', rtype='CHAR', type='VARCHAR'), |
| 1272 | + dict(name='_size_', rtype='NUMERIC', type='INT64'), |
| 1273 | + dict(name='_path_', rtype='CHAR', type='VARCHAR'), |
| 1274 | + dict(name='_type_', rtype='CHAR', type='VARCHAR'), |
| 1275 | + dict(name='_id_', rtype='NUMERIC', type='INT64') |
| 1276 | + ] |
| 1277 | + |
| 1278 | + super(Image, self).__init__(variables, nrecs=nrecs) |
| 1279 | + |
| 1280 | + def getrow(self, row): |
| 1281 | + """Get a row of values from the data source |
| 1282 | +
|
| 1283 | + Parameters |
| 1284 | + ---------- |
| 1285 | + row : int |
| 1286 | + The row index to return. |
| 1287 | +
|
| 1288 | + Returns |
| 1289 | + ------- |
| 1290 | + list-of-any |
| 1291 | + One row of data values |
| 1292 | +
|
| 1293 | + """ |
| 1294 | + if row >= len(self._data): |
| 1295 | + return |
| 1296 | + |
| 1297 | + record = self._data[row] |
| 1298 | + |
| 1299 | + # Convert Path instances to str for Py2.7 compatibility. |
| 1300 | + if type(record).__module__ == 'pathlib': |
| 1301 | + record = str(record) |
| 1302 | + |
| 1303 | + # Default value. Will be overridden if disk location is known. |
| 1304 | + path = 'Image_%d.png' % (row + 1) |
| 1305 | + |
| 1306 | + # Input is path to an image on disk. Can just read bytes directly. |
| 1307 | + if isinstance(record, str): |
| 1308 | + with open(record, 'rb') as f: |
| 1309 | + image = f.read() |
| 1310 | + path = record |
| 1311 | + else: |
| 1312 | + # Otherwise, PIL package is required to format data as an image. |
| 1313 | + if PIL is None: |
| 1314 | + raise RuntimeError( |
| 1315 | + 'Formatting data as images requires the Pillow package ' |
| 1316 | + '(https://pypi.org/project/Pillow/).') |
| 1317 | + |
| 1318 | + # Convert Numpy array to Image |
| 1319 | + if isinstance(record, np.ndarray): |
| 1320 | + record = PIL.Image.fromarray(record) |
| 1321 | + |
| 1322 | + # Get bytes from Image instance |
| 1323 | + if isinstance(record, PIL.Image.Image): |
| 1324 | + buffer = io.BytesIO() |
| 1325 | + |
| 1326 | + # If image was loaded from disk it may have attribute with filename |
| 1327 | + if hasattr(record, 'filename'): |
| 1328 | + record.save(buffer, format=record.format) |
| 1329 | + path = record.filename |
| 1330 | + else: |
| 1331 | + record.save(buffer, format='png') |
| 1332 | + buffer.seek(0) |
| 1333 | + image = buffer.read() |
| 1334 | + |
| 1335 | + # Use folder name if images loaded from subdirectories |
| 1336 | + label = os.path.basename(os.path.dirname(path)) if self._subdirs else '' |
| 1337 | + |
| 1338 | + image_type = os.path.splitext(path)[-1].lower().lstrip('.') |
| 1339 | + size = len(image) |
| 1340 | + |
| 1341 | + return [image, label, size, path, image_type, row + 1] |
0 commit comments