@@ -2183,6 +2183,181 @@ def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
2183
2183
values = values .tobytes ()
2184
2184
2185
2185
return values
2186
+
2187
+ """
2188
+ doDatasetQueryByUuid: return rows based on query string
2189
+ Return rows from a dataset that matches query string.
2190
+
2191
+ Note: Only supported for compound_type/one-dimensional datasets
2192
+ """
2193
+ def doDatasetQueryByUuid (self , obj_uuid , query , start = 0 , stop = - 1 , step = 1 , limit = None ):
2194
+ self .log .info ("doQueryByUuid - uuid: " + obj_uuid + " query:" + query )
2195
+ self .log .info ("start: " + str (start ) + " stop: " + str (stop ) + " step: " + str (step ) + " limit: " + str (limit ))
2196
+ dset = self .getDatasetObjByUuid (obj_uuid )
2197
+ if dset is None :
2198
+ msg = "Dataset: " + obj_uuid + " not found"
2199
+ self .log .info (msg )
2200
+ raise IOError (errno .ENXIO , msg )
2201
+
2202
+ values = []
2203
+ dt = dset .dtype
2204
+ typeItem = getTypeItem (dt )
2205
+ itemSize = getItemSize (typeItem )
2206
+ if typeItem ['class' ] != "H5T_COMPOUND" :
2207
+ msg = "Only compound type datasets can be used as query target"
2208
+ self .log .info (msg )
2209
+ raise IOError (errno .EINVAL , msg )
2210
+
2211
+ if dset .shape is None :
2212
+ # null space dataset (with h5py 2.6.0)
2213
+ return None
2214
+
2215
+ rank = len (dset .shape )
2216
+ if rank != 1 :
2217
+ msg = "One one-dimensional datasets can be used as query target"
2218
+ self .log .info (msg )
2219
+ raise IOError (errno .EINVAL , msg )
2220
+
2221
+
2222
+ values = []
2223
+ indexes = []
2224
+ count = 0
2225
+
2226
+ num_elements = dset .shape [0 ]
2227
+ if stop == - 1 :
2228
+ stop = num_elements
2229
+ elif stop > num_elements :
2230
+ stop = num_elements
2231
+ block_size = self ._getBlockSize (dset )
2232
+ self .log .info ("block_size: " + str (block_size ))
2233
+
2234
+ field_names = list (dset .dtype .fields .keys ())
2235
+ eval_str = self ._getEvalStr (query , field_names )
2236
+
2237
+ while start < stop :
2238
+ if limit and (count == limit ):
2239
+ break # no more rows for this batch
2240
+ end = start + block_size
2241
+ if end > stop :
2242
+ end = stop
2243
+ rows = dset [start :end ] # read from dataset
2244
+ where_result = np .where (eval (eval_str ))
2245
+ index = where_result [0 ].tolist ()
2246
+ if len (index ) > 0 :
2247
+ for i in index :
2248
+ row = rows [i ]
2249
+ item = self .bytesArrayToList (row )
2250
+ values .append (item )
2251
+ indexes .append (start + i )
2252
+ count += 1
2253
+ if limit and (count == limit ):
2254
+ break # no more rows for this batch
2255
+
2256
+ start = end # go to next block
2257
+
2258
+
2259
+ # values = self.getDataValue(item_type, values, dimension=1, dims=(len(values),))
2260
+
2261
+ self .log .info ("got " + str (count ) + " query matches" )
2262
+ return (indexes , values )
2263
+
2264
+ """
2265
+ _getBlockSize: Get number of rows to read from disk
2266
+
2267
+ heurestic to get reasonable sized chunk of data to fetch.
2268
+ make multiple of chunk_size if possible
2269
+ """
2270
+ def _getBlockSize (self , dset ):
2271
+ target_block_size = 256 * 1000
2272
+ if dset .chunks :
2273
+ chunk_size = dset .chunks [0 ]
2274
+ if chunk_size < target_block_size :
2275
+ block_size = (target_block_size // chunk_size ) * chunk_size
2276
+ else :
2277
+ block_size = target_block_size
2278
+ else :
2279
+ block_size = target_block_size
2280
+ return block_size
2281
+
2282
+ """
2283
+ _getEvalStr: Get eval string for given query
2284
+
2285
+ Gets Eval string to use with numpy where method.
2286
+ """
2287
+ def _getEvalStr (self , query , field_names ):
2288
+ i = 0
2289
+ eval_str = ""
2290
+ var_name = None
2291
+ end_quote_char = None
2292
+ var_count = 0
2293
+ paren_count = 0
2294
+ black_list = ( "import" , ) # field names that are not allowed
2295
+ self .log .info ("getEvalStr(" + query + ")" )
2296
+ for item in black_list :
2297
+ if item in field_names :
2298
+ msg = "invalid field name"
2299
+ self .log .info ("EINVAL: " + msg )
2300
+ raise IOError (errno .EINVAL , msg )
2301
+ while i < len (query ):
2302
+ ch = query [i ]
2303
+ if (i + 1 ) < len (query ):
2304
+ ch_next = query [i + 1 ]
2305
+ else :
2306
+ ch_next = None
2307
+ if var_name and not ch .isalnum ():
2308
+ # end of variable
2309
+ if var_name not in field_names :
2310
+ # invalid
2311
+ msg = "unknown field name"
2312
+ self .log .info ("EINVAL: " + msg )
2313
+ raise IOError (errno .EINVAL , msg )
2314
+ eval_str += "rows['" + var_name + "']"
2315
+ var_name = None
2316
+ var_count += 1
2317
+
2318
+ if end_quote_char :
2319
+ if ch == end_quote_char :
2320
+ # end of literal
2321
+ end_quote_char = None
2322
+ eval_str += ch
2323
+ elif ch in ("'" , '"' ):
2324
+ end_quote_char = ch
2325
+ eval_str += ch
2326
+ elif ch .isalpha ():
2327
+ if ch == 'b' and ch_next in ("'" , '"' ):
2328
+ eval_str += 'b' # start of a byte string literal
2329
+ elif var_name is None :
2330
+ var_name = ch # start of a variable
2331
+ else :
2332
+ var_name += ch
2333
+ elif ch == '(' and end_quote_char is None :
2334
+ paren_count += 1
2335
+ eval_str += ch
2336
+ elif ch == ')' and end_quote_char is None :
2337
+ paren_count -= 1
2338
+ if paren_count < 0 :
2339
+ msg = "Mismatched paren"
2340
+ self .log .info ("EINVAL: " + msg )
2341
+ raise IOError (errno .EINVAL , msg )
2342
+ eval_str += ch
2343
+ else :
2344
+ # just add to eval_str
2345
+ eval_str += ch
2346
+ i = i + 1
2347
+ if end_quote_char :
2348
+ msg = "no matching quote character"
2349
+ self .log .info ("EINVAL: " + msg )
2350
+ raise IOError (errno .EINVAL , msg )
2351
+ if var_count == 0 :
2352
+ msg = "No field value"
2353
+ self .log .info ("EINVAL: " + msg )
2354
+ raise IOError (errno .EINVAL , msg )
2355
+ if paren_count != 0 :
2356
+ msg = "Mismatched paren"
2357
+ self .log .info ("EINVAL: " + msg )
2358
+ raise IOError (errno .EINVAL , msg )
2359
+
2360
+ return eval_str
2186
2361
2187
2362
"""
2188
2363
Get values from dataset identified by obj_uuid using the given
0 commit comments