@@ -6692,16 +6692,197 @@ The content of the new file is:
6692
6692
4, 0, 5 ;
6693
6693
}
6694
6694
6695
- ----
6696
-
6697
6695
.. _Coordinate-subampling:
6698
6696
6699
6697
Coordinate subsampling
6700
6698
^^^^^^^^^^^^^^^^^^^^^^
6701
6699
6702
6700
`Lossy compression by coordinate subsampling `_ was introduced into the
6703
- CF conventions at CF-1.9, but is not yet available in cfdm. It will be
6704
- ready in a future 3.x.0 release.
6701
+ CF conventions at CF-1.10 for applications for which the coordinates
6702
+ can require considerably more storage than the data itself. Space may
6703
+ be saved in the netCDF file by storing a subsample of the coordinates
6704
+ that describe the data, and the uncompressed coordinate and auxiliary
6705
+ coordinate variables are reconstituted by interpolation, from the
6706
+ subsampled coordinate values to the domain of the data
6707
+
6708
+ This is illustrated with the file ``subsampled.nc `` (found in the
6709
+ :ref: `sample datasets <Sample-datasets >`):
6710
+
6711
+
6712
+ .. code-block:: console
6713
+ :caption: *Inspect the compressed dataset with the ncdump command
6714
+ line tool. *
6715
+
6716
+ $ ncdump -h subsampled.nc
6717
+ netcdf subsampled {
6718
+ dimensions:
6719
+ time = 2 ;
6720
+ lat = 18 ;
6721
+ lon = 12 ;
6722
+ tp_lat = 4 ;
6723
+ tp_lon = 5 ;
6724
+ variables:
6725
+ float time(time) ;
6726
+ time:standard_name = "time" ;
6727
+ time:units = "days since 2000-01-01" ;
6728
+ float lat(tp_lat, tp_lon) ;
6729
+ lat:standard_name = "latitude" ;
6730
+ lat:units = "degrees_north" ;
6731
+ lat:bounds_tie_points = "lat_bounds" ;
6732
+ float lon(tp_lat, tp_lon) ;
6733
+ lon:standard_name = "longitude" ;
6734
+ lon:units = "degrees_east" ;
6735
+ lon:bounds_tie_points = "lon_bounds" ;
6736
+ float lat_bounds(tp_lat, tp_lon) ;
6737
+ float lon_bounds(tp_lat, tp_lon) ;
6738
+ int lat_indices(tp_lat) ;
6739
+ lat_indices:long_name = "Tie point indices for latitude dimension" ;
6740
+ int lon_indices(tp_lon) ;
6741
+ lon_indices:long_name = "Tie point indices for longitude dimension" ;
6742
+ int bilinear ;
6743
+ bilinear:interpolation_name = "bi_linear" ;
6744
+ bilinear:computational_precision = "64" ;
6745
+ bilinear:tie_point_mapping =
6746
+ "lat: lat_indices tp_lat lon: lon_indices tp_lon" ;
6747
+ float q(time, lat, lon) ;
6748
+ q:standard_name = "specific_humidity" ;
6749
+ q:units = "1" ;
6750
+ q:coordinate_interpolation = "lat: lon: bilinear" ;
6751
+
6752
+ // global attributes:
6753
+ :Conventions = "CF-1.11" ;
6754
+ }
6755
+
6756
+
6757
+ Reading and inspecting this file shows the latitude and longitude
6758
+ coordinates in uncompressed form, whilst their underlying arrays are
6759
+ still in subsampled representation described in the file:
6760
+
6761
+ .. code-block:: python
6762
+ :caption: *Read a field construct from a dataset that has been
6763
+ compressed by corodinate subsampling, and inspect
6764
+ coordinates. *
6765
+
6766
+ >>> f = cf.read('subsampled.nc')[0]
6767
+ >>> print(f)
6768
+ Field: specific_humidity (ncvar%q)
6769
+ ----------------------------------
6770
+ Data : specific_humidity(time(2), ncdim%lat(18), ncdim%lon(12)) 1
6771
+ Dimension coords: time(2) = [2000-01-01 00:00:00, 2000-02-01 00:00:00]
6772
+ Auxiliary coords: latitude(ncdim%lat(18), ncdim%lon(12)) = [[-85.0, ..., 85.0]] degrees_north
6773
+ : longitude(ncdim%lat(18), ncdim%lon(12)) = [[15.0, ..., 345.0]] degrees_east
6774
+ >>> lon = f.construct('longitude')
6775
+ >>> lon
6776
+ <AuxiliaryCoordinate: longitude(18, 12) degrees_east>
6777
+ >>> lon.data.source()
6778
+ <SubsampledArray(18, 12): >
6779
+ >>> print(lon.array)
6780
+ [[15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6781
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6782
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6783
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6784
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6785
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6786
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6787
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6788
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6789
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6790
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6791
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6792
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6793
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6794
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6795
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6796
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6797
+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]]
6798
+ >>> lon.data.source().source()
6799
+ <Data(4, 5): [[15.0, ..., 345.0]]>
6800
+ >>> print(lon.data.source().source().array)
6801
+ [[ 15. 135. 225. 255. 345.]
6802
+ [ 15. 135. 225. 255. 345.]
6803
+ [ 15. 135. 225. 255. 345.]
6804
+ [ 15. 135. 225. 255. 345.]]
6805
+
6806
+ As with all other forms of compression, the field may be treated as if
6807
+ were not compressed:
6808
+
6809
+ .. code-block:: python
6810
+ :caption: *Get subspaces based on indices of the uncompressed
6811
+ data. *
6812
+
6813
+ >>> g = f[0, 6, :]
6814
+ >>> print(g)
6815
+ Field: specific_humidity (ncvar%q)
6816
+ ----------------------------------
6817
+ Data : specific_humidity(time(1), ncdim%lat(1), ncdim%lon(12)) 1
6818
+ Dimension coords: time(1) = [2000-01-01 00:00:00]
6819
+ Auxiliary coords: latitude(ncdim%lat(1), ncdim%lon(12)) = [[-25.0, ..., -25.0]] degrees_north
6820
+ : longitude(ncdim%lat(1), ncdim%lon(12)) = [[15.0, ..., 345.0]] degrees_east
6821
+ >>> print(g.construct('longitude').array)
6822
+ [[15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]]
6823
+
6824
+
6825
+ The metadata that define the subsampling are contained within the
6826
+ coordinate's `Data ` object:
6827
+
6828
+ .. code-block:: python
6829
+ :caption: *Get subspaces based on indices of the uncompressed
6830
+ data. *
6831
+
6832
+ >>> lon = f.construct('longitude')
6833
+ >>> d = lon.data.source()
6834
+ >>> d.get_tie_point_indices()
6835
+ {0: <TiePointIndex: long_name=Tie point indices for latitude dimension(4) >,
6836
+ 1: <TiePointIndex: long_name=Tie point indices for longitude dimension(5) >}
6837
+ >>> d.get_computational_precision()
6838
+ '64'
6839
+
6840
+ It is not yet, as of version 1.10.0.0, possible to write to disk a
6841
+ field construct with compression by coordinate subsampling.
6842
+
6843
+ .. _Lossy-compression-via-quantization:
6844
+
6845
+ Lossy compression via quantization
6846
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6847
+
6848
+ `Lossy compression via quantization `_ eliminates false precision,
6849
+ usually by rounding the least significant bits of floating-point
6850
+ mantissas to zeros, so that a subsequent compression on disk is more
6851
+ efficient. Quantization is described by the following parameters:
6852
+
6853
+ * The ``algorithm `` parameter names a specific quantization algorithm.
6854
+
6855
+ * The ``implementation `` parameter contains unstandardised text that
6856
+ concisely conveys the algorithm provenance including the name of the
6857
+ library or client that performed the quantization, the software
6858
+ version, and any other information required to disambiguate the
6859
+ source of the algorithm employed. The text must take the form
6860
+ ``software-name version version-string [(optional-information)] ``.
6861
+
6862
+ * The retained precision of the algortqhm is defined with either the
6863
+ ``quantization_nsb `` or ``quantization_nsd `` parameter.
6864
+
6865
+ If quantization has been applied to the data, then it may be described
6866
+ with in a `Quantization ` object, accessed via the construct's
6867
+ `!get_quantization ` method. To apply quantization at the time of
6868
+ writing the data to disk, use the construct's `!set_quantize_on_write `
6869
+ method:
6870
+
6871
+ .. code-block:: python
6872
+ :caption: *Lossy compression via quantization. *
6873
+
6874
+ >>> q, t = cf.read('file.nc')
6875
+ >>> t.set_quantize_on_write(algorithm='bitgroom', quantization_nsd=6)
6876
+ >>> cf.write(t, 'quantized.nc')
6877
+ >>> quantized = cf.read('quantized.nc')[0]
6878
+ >>> c = quantized.get_quantization()
6879
+ >>> c
6880
+ <CF Quantization: _QuantizeBitGroomNumberOfSignificantDigits=6, algorithm=bitgroom, implementation=libnetcdf version 4.9.4-development, quantization_nsd=6>
6881
+ >>> c.parameters()
6882
+ {'algorithm': 'bitgroom',
6883
+ 'implementation': 'libnetcdf version 4.9.4-development',
6884
+ '_QuantizeBitGroomNumberOfSignificantDigits': np.int32(6),
6885
+ 'quantization_nsd': np.int64(6)}
6705
6886
6706
6887
----
6707
6888
0 commit comments