27
27
import base64
28
28
import copy
29
29
import io
30
- import pytz
30
+ import os
31
31
import re
32
32
import datetime
33
33
import warnings
52
52
from ..dataframe import SASDataFrame
53
53
from ..utils import getsoptions
54
54
from .connection import getone , CASRequest , CASResponse
55
+
55
56
try :
56
57
import PIL
57
58
except ImportError :
@@ -1181,7 +1182,7 @@ def getrow(self, row):
1181
1182
1182
1183
1183
1184
class Image (CASDataMsgHandler ):
1184
- ''' CAS data message handler for images.
1185
+ """ CAS data message handler for images.
1185
1186
1186
1187
Parameters
1187
1188
----------
@@ -1193,16 +1194,18 @@ class Image(CASDataMsgHandler):
1193
1194
a list of file paths specifying the location of each image.
1194
1195
- iterable of :class:`numpy.ndarray`
1195
1196
a list of arrays where each array contains the pixel values for the image.
1196
- Arrays should be (height, width) or (height, width, 3). Channel order is assumed to be RGB.
1197
+ Arrays should be (height, width) or (height, width, 3). Channel order is
1198
+ assumed to be RGB.
1197
1199
- iterable of :class:`PIL.Image.Image`
1198
1200
a list of Pillow Image objects.
1199
1201
nrecs : int, optional
1200
1202
The number of rows to allocate in the buffer. This can be
1201
1203
smaller than the number of totals rows since they are uploaded
1202
1204
in batches `nrecs` long.
1203
1205
subdirs : bool, optional
1204
- Whether to search subdirectories for additional images. Only applies when `data` is a path to a directory.
1205
- If images are read from subdirectories, the name of the subdirectory will be used as the image class label.
1206
+ Whether to search subdirectories for additional images. Only applies when
1207
+ `data` is a path to a directory. If images are read from subdirectories, the
1208
+ name of the subdirectory will be used as the image class label.
1206
1209
1207
1210
See Also
1208
1211
--------
@@ -1212,15 +1215,15 @@ class Image(CASDataMsgHandler):
1212
1215
1213
1216
Notes
1214
1217
-----
1215
- When using this data message handler to upload client-side images to CAS, the behavior should be similar to that
1216
- of the image.loadImages_ CAS action for loading server-side images:
1217
-
1218
- .. _image.loadImages: https://go.documentation.sas.com/doc/en/pgmsascdc/v_028/casactml/casactml_image_details22.htm
1218
+ When using this data message handler to upload client-side images to CAS, the
1219
+ behavior should be similar to that of the image.loadImages_ CAS action for loading
1220
+ server-side images:
1219
1221
1222
+ .. _image.loadImages: https://go.documentation.sas.com/doc/en/pgmsascdc/v_028/casactml/casactml_image_details22.htm # noqa: E501
1220
1223
1221
- Although images will be stored in binary format to a CAS table column labeled "_image_", the CAS table metadata will
1222
- not indicate that this column should be interpreted as images. Use the altertable CAS action to update the
1223
- column's type:
1224
+ Although images will be stored in binary format to a CAS table column
1225
+ labeled "_image_", the CAS table metadata will not indicate that this column should
1226
+ be interpreted as images. Use the altertable CAS action to update the column's type:
1224
1227
1225
1228
>>> conn.addtable(table='mytable', **imagedmh.args.addtable)
1226
1229
>>> conn.altertable(table='mytable', columns=[{'name': '_image_', 'binaryType': 'image'}])
@@ -1231,7 +1234,7 @@ class Image(CASDataMsgHandler):
1231
1234
>>> conn.addtable(table='mytable', **dmh.args.addtable).casTable
1232
1235
... CASTable('MYTABLE', caslib='CASUSER(user)')
1233
1236
1234
- '''
1237
+ """
1235
1238
1236
1239
def __init__ (self , data , nrecs = 1000 , subdirs = True ):
1237
1240
if isinstance (data , (str , Path )):
@@ -1240,32 +1243,32 @@ def __init__(self, data, nrecs=1000, subdirs=True):
1240
1243
1241
1244
# Search for all images in the directory and (optionally) in subdirectories
1242
1245
for extension in (
1243
- 'bmp' , 'dib' , 'jpg' , 'jpeg' , 'jpe' , 'jp2' , 'png' , 'pbm' , 'pmg' , 'ppm' , 'tif' , 'tiff' , 'webp' ):
1246
+ 'bmp' , 'dib' , 'jpg' , 'jpeg' , 'jpe' , 'jp2' , 'png' , 'pbm' , 'pmg' , 'ppm' ,
1247
+ 'tif' , 'tiff' , 'webp' ):
1248
+
1244
1249
if subdirs :
1245
1250
files .extend (path .glob (f'**/*.{ extension } ' ))
1246
- files .extend (path .glob (f'**/*.{ extension .upper ()} ' ))
1247
1251
else :
1248
1252
files .extend (path .glob (f'*.{ extension } ' ))
1249
- files .extend (path .glob (f'*.{ extension .upper ()} ' ))
1250
1253
self ._data = files
1251
1254
else :
1252
1255
self ._data = list (data )
1253
1256
1254
1257
self ._subdirs = subdirs
1255
1258
1256
1259
variables = [
1257
- dict (name = '_image_' , rtype = 'CHAR' , type = 'VARBINARY' , offset = 0 , length = 16 ),
1258
- dict (name = '_label_' , rtype = 'CHAR' , type = 'VARCHAR' , offset = 16 , length = 16 ),
1259
- dict (name = '_filename_0' , rtype = 'CHAR' , type = 'VARCHAR' , offset = 32 , length = 16 ),
1260
- dict (name = '_path_' , rtype = 'CHAR' , type = 'VARCHAR' , offset = 48 , length = 16 ),
1261
- dict (name = '_id_' , rtype = 'NUMERIC' , type = 'INT64' , offset = 64 , length = 8 ),
1260
+ dict (name = '_image_' , rtype = 'CHAR' , type = 'VARBINARY' ),
1261
+ dict (name = '_label_' , rtype = 'CHAR' , type = 'VARCHAR' ),
1262
+ dict (name = '_size_' , rtype = 'NUMERIC' , type = 'INT64' ),
1263
+ dict (name = '_path_' , rtype = 'CHAR' , type = 'VARCHAR' ),
1264
+ dict (name = '_type_' , rtype = 'CHAR' , type = 'VARCHAR' ),
1265
+ dict (name = '_id_' , rtype = 'NUMERIC' , type = 'INT64' )
1262
1266
]
1263
- reclen = sum ([variable ['length' ] for variable in variables ])
1264
1267
1265
- super (Image , self ).__init__ (variables , nrecs = nrecs , reclen = reclen )
1268
+ super (Image , self ).__init__ (variables , nrecs = nrecs )
1266
1269
1267
1270
def getrow (self , row ):
1268
- ''' Get a row of values from the data source
1271
+ """ Get a row of values from the data source
1269
1272
1270
1273
Parameters
1271
1274
----------
@@ -1277,35 +1280,26 @@ def getrow(self, row):
1277
1280
list-of-any
1278
1281
One row of data values
1279
1282
1280
- Raises
1281
- ------
1282
- RuntimeError
1283
- If processing Numpy arrays and :mod:`PIL` package is not installed.
1284
-
1285
- '''
1283
+ """
1286
1284
if row >= len (self ._data ):
1287
1285
return
1288
1286
1289
1287
record = self ._data [row ]
1290
- label = ''
1291
- filename = 'Image_%d.png' % ( row + 1 )
1292
- path = filename
1288
+
1289
+ # Default value. Will be overridden if disk location is known.
1290
+ path = 'Image_%d.png' % ( row + 1 )
1293
1291
1294
1292
# Input is path to an image on disk. Can just read bytes directly.
1295
1293
if isinstance (record , (str , Path )):
1296
1294
with open (record , 'rb' ) as f :
1297
1295
image = f .read ()
1298
- filename = Path (record ).name
1299
1296
path = str (record )
1300
-
1301
- # If images were pulled from subdirectories, then the image label is the directory name.
1302
- if self ._subdirs :
1303
- label = Path (record ).parent .name
1304
1297
else :
1305
1298
# Otherwise, PIL package is required to format data as an image.
1306
1299
if PIL is None :
1307
1300
raise RuntimeError (
1308
- 'Formatting data as images requires the Pillow package (https://pypi.org/project/Pillow/).' )
1301
+ 'Formatting data as images requires the Pillow package '
1302
+ '(https://pypi.org/project/Pillow/).' )
1309
1303
1310
1304
# Convert Numpy array to Image
1311
1305
if isinstance (record , np .ndarray ):
@@ -1314,8 +1308,20 @@ def getrow(self, row):
1314
1308
# Get bytes from Image instance
1315
1309
if isinstance (record , PIL .Image .Image ):
1316
1310
buffer = io .BytesIO ()
1317
- record .save (buffer , format = 'png' )
1311
+
1312
+ # If image was loaded from disk it may have attribute with filename
1313
+ if hasattr (record , 'filename' ):
1314
+ record .save (buffer , format = record .format )
1315
+ path = record .filename
1316
+ else :
1317
+ record .save (buffer , format = 'png' )
1318
1318
buffer .seek (0 )
1319
1319
image = buffer .read ()
1320
1320
1321
- return [image , label , filename , path , row + 1 ]
1321
+ # Use folder name if images loaded from subdirectories
1322
+ label = os .path .basename (os .path .dirname (path )) if self ._subdirs else ''
1323
+
1324
+ image_type = os .path .splitext (path )[- 1 ].lower ().lstrip ('.' )
1325
+ size = len (image )
1326
+
1327
+ return [image , label , size , path , image_type , row + 1 ]
0 commit comments