@@ -207,22 +207,6 @@ def _create_data(
207
207
if data .npartitions == 1 :
208
208
data ._cfa_set_write (True )
209
209
210
- # if (
211
- # not compression_index
212
- # and self.read_vars.get("cache")
213
- # and self.implementation.get_construct_type(construct)
214
- # != "field"
215
- # ):
216
- # # Only cache values from non-field data and
217
- # # non-compression-index data, on the assumptions that:
218
- # #
219
- # # a) Field data is, in general, so large that finding
220
- # # the cached values takes too long.
221
- # #
222
- # # b) Cached values are never really required for
223
- # # compression index data.
224
- # self._cache_data_elements(data, ncvar)
225
-
226
210
return data
227
211
228
212
# ------------------------------------------------------------
@@ -312,99 +296,6 @@ def _is_cfa_variable(self, ncvar):
312
296
and ncvar not in g ["external_variables" ]
313
297
)
314
298
315
- # def _create_Data(
316
- # self,
317
- # array,
318
- # ncvar,
319
- # units=None,
320
- # calendar=None,
321
- # ncdimensions=(),
322
- # **kwargs,
323
- # ):
324
- # """Create a Data object from a netCDF variable.
325
- #
326
- # .. versionadded:: 3.0.0
327
- #
328
- # :Parameters:
329
- #
330
- # array: `Array`
331
- # The file array.
332
- #
333
- # ncvar: `str`
334
- # The netCDF variable containing the array.
335
- #
336
- # units: `str`, optional
337
- # The units of *array*. By default, or if `None`, it is
338
- # assumed that there are no units.
339
- #
340
- # calendar: `str`, optional
341
- # The calendar of *array*. By default, or if `None`, it is
342
- # assumed that there is no calendar.
343
- #
344
- # ncdimensions: sequence of `str`, optional
345
- # The netCDF dimensions spanned by the array.
346
- #
347
- # .. versionadded:: 3.14.0
348
- #
349
- # kwargs: optional
350
- # Extra parameters to pass to the initialisation of the
351
- # returned `Data` object.
352
- #
353
- # :Returns:
354
- #
355
- # `Data`
356
- #
357
- # """
358
- # if array.dtype is None:
359
- # # The array is based on a netCDF VLEN variable, and
360
- # # therefore has unknown data type. To find the correct
361
- # # data type (e.g. "<U7"), we need to read the entire array
362
- # # from its netCDF variable into memory to find the longest
363
- # # string.
364
- # g = self.read_vars
365
- # if g["has_groups"]:
366
- # group, name = self._netCDF4_group(
367
- # g["variable_grouped_dataset"][ncvar], ncvar
368
- # )
369
- # variable = group.variables.get(name)
370
- # else:
371
- # variable = g["variables"].get(ncvar)
372
- #
373
- # array = variable[...]
374
- #
375
- # string_type = isinstance(array, str)
376
- # if string_type:
377
- # # A netCDF string type scalar variable comes out as Python
378
- # # str object, so convert it to a numpy array.
379
- # array = np.array(array, dtype=f"U{len(array)}")
380
- #
381
- # if not variable.ndim:
382
- # # NetCDF4 has a thing for making scalar size 1
383
- # # variables into 1d arrays
384
- # array = array.squeeze()
385
- #
386
- # if not string_type:
387
- # # A N-d (N>=1) netCDF string type variable comes out
388
- # # as a numpy object array, so convert it to numpy
389
- # # string array.
390
- # array = array.astype("U", copy=False)
391
- # # NetCDF4 doesn't auto-mask VLEN variables
392
- # array = np.ma.where(array == "", np.ma.masked, array)
393
- #
394
- # # Parse dask chunks
395
- # chunks = self._dask_chunks(array, ncvar, compressed)
396
- #
397
- # data = super()._create_Data(
398
- # array,
399
- # ncvar,
400
- # units=units,
401
- # calendar=calendar,
402
- # chunks=chunks,
403
- # **kwargs,
404
- # )
405
- #
406
- # return data
407
-
408
299
def _customise_read_vars (self ):
409
300
"""Customise the read parameters.
410
301
@@ -465,160 +356,6 @@ def _customise_read_vars(self):
465
356
for term_ncvar in parsed_aggregated_data .values ():
466
357
g ["do_not_create_field" ].add (term_ncvar )
467
358
468
- # def _cache_data_elements(self, data, ncvar):
469
- # """Cache selected element values.
470
- #
471
- # Updates *data* in-place to store its first, second,
472
- # penultimate, and last element values (as appropriate).
473
- #
474
- # These values are used by, amongst other things,
475
- # `cf.Data.equals`, `cf.aggregate` and for inspection.
476
- #
477
- # Doing this here is quite cheap because only the individual
478
- # elements are read from the already-open file, as opposed to
479
- # being retrieved from *data* (which would require a whole dask
480
- # chunk to be read to get each single value).
481
- #
482
- # However, empirical evidence shows that using netCDF4 to access
483
- # the first and last elements of a large array on disk
484
- # (e.g. shape (1, 75, 1207, 1442)) is slow (e.g. ~2 seconds) and
485
- # doesn't scale well with array size (i.e. it takes
486
- # disproportionally longer for larger arrays). Such arrays are
487
- # usually in field constructs, for which `cf.aggregate` does not
488
- # need to know any array values, so this method should be used
489
- # with caution, if at all, on field construct data.
490
- #
491
- # .. versionadded:: 3.14.0
492
- #
493
- # :Parameters:
494
- #
495
- # data: `Data`
496
- # The data to be updated with its cached values.
497
- #
498
- # ncvar: `str`
499
- # The name of the netCDF variable that contains the
500
- # data.
501
- #
502
- # :Returns:
503
- #
504
- # `None`
505
- #
506
- # """
507
- #
508
- # if data.data.get_compression_type():
509
- # # Don't get cached elements from arrays compressed by
510
- # # convention, as they'll likely be wrong.
511
- # return
512
- #
513
- # g = self.read_vars
514
- #
515
- # # Get the netCDF4.Variable for the data
516
- # if g["has_groups"]:
517
- # group, name = self._netCDF4_group(
518
- # g["variable_grouped_dataset"][ncvar], ncvar
519
- # )
520
- # variable = group.variables.get(name)
521
- # else:
522
- # variable = g["variables"].get(ncvar)
523
- #
524
- # # Get the required element values
525
- # size = data.size
526
- # ndim = data.ndim
527
- #
528
- # char = False
529
- # if variable.ndim == ndim + 1:
530
- # dtype = variable.dtype
531
- # if dtype is not str and dtype.kind in "SU":
532
- # # This variable is a netCDF classic style char array
533
- # # with a trailing dimension that needs to be collapsed
534
- # char = True
535
- #
536
- # if ndim == 1:
537
- # # Also cache the second element for 1-d data, on the
538
- # # assumption that they may well be dimension coordinate
539
- # # data.
540
- # if size == 1:
541
- # indices = (0, -1)
542
- # value = variable[...]
543
- # values = (value, value)
544
- # elif size == 2:
545
- # indices = (0, 1, -1)
546
- # value = variable[-1:]
547
- # values = (variable[:1], value, value)
548
- # else:
549
- # indices = (0, 1, -1)
550
- # values = (variable[:1], variable[1:2], variable[-1:])
551
- # elif ndim == 2 and data.shape[-1] == 2:
552
- # # Assume that 2-d data with a last dimension of size 2
553
- # # contains coordinate bounds, for which it is useful to
554
- # # cache the upper and lower bounds of the the first and
555
- # # last cells.
556
- # indices = (0, 1, -2, -1)
557
- # ndim1 = ndim - 1
558
- # values = (
559
- # variable[(slice(0, 1),) * ndim1 + (slice(0, 1),)],
560
- # variable[(slice(0, 1),) * ndim1 + (slice(1, 2),)],
561
- # )
562
- # if data.size == 2:
563
- # values = values + values
564
- # else:
565
- # values += (
566
- # variable[(slice(-1, None, 1),) * ndim1 + (slice(0, 1),)],
567
- # variable[(slice(-1, None, 1),) * ndim1 + (slice(1, 2),)],
568
- # )
569
- # elif size == 1:
570
- # indices = (0, -1)
571
- # value = variable[...]
572
- # values = (value, value)
573
- # elif size == 3:
574
- # indices = (0, 1, -1)
575
- # if char:
576
- # values = variable[...].reshape(3, variable.shape[-1])
577
- # else:
578
- # values = variable[...].flatten()
579
- # else:
580
- # indices = (0, -1)
581
- # values = (
582
- # variable[(slice(0, 1),) * ndim],
583
- # variable[(slice(-1, None, 1),) * ndim],
584
- # )
585
- #
586
- # # Create a dictionary of the element values
587
- # elements = {}
588
- # for index, value in zip(indices, values):
589
- # if char:
590
- # # Variable is a netCDF classic style char array, so
591
- # # collapse (by concatenation) the outermost (fastest
592
- # # varying) dimension. E.g. [['a','b','c']] becomes
593
- # # ['abc']
594
- # if value.dtype.kind == "U":
595
- # value = value.astype("S")
596
- #
597
- # a = netCDF4.chartostring(value)
598
- # shape = a.shape
599
- # a = np.array([x.rstrip() for x in a.flat])
600
- # a = np.reshape(a, shape)
601
- # value = np.ma.masked_where(a == "", a)
602
- #
603
- # if np.ma.is_masked(value):
604
- # value = np.ma.masked
605
- # else:
606
- # try:
607
- # value = value.item()
608
- # except (AttributeError, ValueError):
609
- # # AttributeError: A netCDF string type scalar
610
- # # variable comes out as Python str object, which
611
- # # has no 'item' method.
612
- # #
613
- # # ValueError: A size-0 array can't be converted to
614
- # # a Python scalar.
615
- # pass
616
- #
617
- # elements[index] = value
618
- #
619
- # # Store the elements in the data object
620
- # data._set_cached_elements(elements)
621
-
622
359
def _create_cfanetcdfarray (
623
360
self ,
624
361
ncvar ,
@@ -771,72 +508,6 @@ def _create_cfanetcdfarray_term(
771
508
772
509
return array , kwargs
773
510
774
- #
775
- # def _parse_chunks(self, ncvar):
776
- # """Parse the dask chunks.
777
- #
778
- # .. versionadded:: 3.14.0
779
- #
780
- # :Parameters:
781
- #
782
- # ncvar: `str`
783
- # The name of the netCDF variable containing the array.
784
- #
785
- # :Returns:
786
- #
787
- # `str`, `int` or `dict`
788
- # The parsed chunks that are suitable for passing to a
789
- # `Data` object containing the variable's array.
790
- #
791
- # """
792
- # g = self.read_vars
793
- #
794
- # default_chunks = "auto"
795
- # chunks = g.get("chunks", default_chunks)
796
- #
797
- # if chunks is None:
798
- # return -1
799
- #
800
- # if isinstance(chunks, dict):
801
- # if not chunks:
802
- # return default_chunks
803
- #
804
- # # For ncdimensions = ('time', 'lat'):
805
- # #
806
- # # chunks={} -> ["auto", "auto"]
807
- # # chunks={'ncdim%time': 12} -> [12, "auto"]
808
- # # chunks={'ncdim%time': 12, 'ncdim%lat': 10000} -> [12, 10000]
809
- # # chunks={'ncdim%time': 12, 'ncdim%lat': "20MB"} -> [12, "20MB"]
810
- # # chunks={'ncdim%time': 12, 'latitude': -1} -> [12, -1]
811
- # # chunks={'ncdim%time': 12, 'Y': None} -> [12, None]
812
- # # chunks={'ncdim%time': 12, 'ncdim%lat': (30, 90)} -> [12, (30, 90)]
813
- # # chunks={'ncdim%time': 12, 'ncdim%lat': None, 'X': 5} -> [12, None]
814
- # attributes = g["variable_attributes"]
815
- # chunks2 = []
816
- # for ncdim in g["variable_dimensions"][ncvar]:
817
- # key = f"ncdim%{ncdim}"
818
- # if key in chunks:
819
- # chunks2.append(chunks[key])
820
- # continue
821
- #
822
- # found_coord_attr = False
823
- # dim_coord_attrs = attributes.get(ncdim)
824
- # if dim_coord_attrs is not None:
825
- # for attr in ("standard_name", "axis"):
826
- # key = dim_coord_attrs.get(attr)
827
- # if key in chunks:
828
- # found_coord_attr = True
829
- # chunks2.append(chunks[key])
830
- # break
831
- #
832
- # if not found_coord_attr:
833
- # # Use default chunks for this dimension
834
- # chunks2.append(default_chunks)
835
- #
836
- # chunks = chunks2
837
- #
838
- # return chunks
839
-
840
511
def _customise_field_ancillaries (self , parent_ncvar , f ):
841
512
"""Create customised field ancillary constructs.
842
513
0 commit comments