11
11
Sequence ,
12
12
)
13
13
from functools import partial
14
- from io import BytesIO
14
+ from io import IOBase
15
15
from itertools import starmap
16
16
from numbers import Number
17
17
from typing import (
31
31
from xarray .backends .common import (
32
32
AbstractDataStore ,
33
33
ArrayWriter ,
34
+ BytesIOProxy ,
35
+ T_PathFileOrDataStore ,
34
36
_find_absolute_paths ,
35
37
_normalize_path ,
36
38
)
@@ -503,7 +505,7 @@ def _datatree_from_backend_datatree(
503
505
504
506
505
507
def open_dataset (
506
- filename_or_obj : str | os . PathLike [ Any ] | ReadBuffer | AbstractDataStore ,
508
+ filename_or_obj : T_PathFileOrDataStore ,
507
509
* ,
508
510
engine : T_Engine = None ,
509
511
chunks : T_Chunks = None ,
@@ -533,12 +535,13 @@ def open_dataset(
533
535
534
536
Parameters
535
537
----------
536
- filename_or_obj : str, Path, file-like or DataStore
538
+ filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore
537
539
Strings and Path objects are interpreted as a path to a netCDF file
538
540
or an OpenDAP URL and opened with python-netCDF4, unless the filename
539
541
ends with .gz, in which case the file is gunzipped and opened with
540
- scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
541
- objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
542
+ scipy.io.netcdf (only netCDF3 supported). Bytes, memoryview and
543
+ file-like objects are opened by scipy.io.netcdf (netCDF3) or h5netcdf
544
+ (netCDF4).
542
545
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
543
546
, installed backend \
544
547
or subclass of xarray.backends.BackendEntrypoint, optional
@@ -743,7 +746,7 @@ def open_dataset(
743
746
744
747
745
748
def open_dataarray (
746
- filename_or_obj : str | os . PathLike [ Any ] | ReadBuffer | AbstractDataStore ,
749
+ filename_or_obj : T_PathFileOrDataStore ,
747
750
* ,
748
751
engine : T_Engine = None ,
749
752
chunks : T_Chunks = None ,
@@ -774,12 +777,13 @@ def open_dataarray(
774
777
775
778
Parameters
776
779
----------
777
- filename_or_obj : str, Path, file-like or DataStore
780
+ filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore
778
781
Strings and Path objects are interpreted as a path to a netCDF file
779
782
or an OpenDAP URL and opened with python-netCDF4, unless the filename
780
783
ends with .gz, in which case the file is gunzipped and opened with
781
- scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
782
- objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
784
+ scipy.io.netcdf (only netCDF3 supported). Bytes, memoryview and
785
+ file-like objects are opened by scipy.io.netcdf (netCDF3) or h5netcdf
786
+ (netCDF4).
783
787
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
784
788
, installed backend \
785
789
or subclass of xarray.backends.BackendEntrypoint, optional
@@ -970,7 +974,7 @@ def open_dataarray(
970
974
971
975
972
976
def open_datatree (
973
- filename_or_obj : str | os . PathLike [ Any ] | ReadBuffer | AbstractDataStore ,
977
+ filename_or_obj : T_PathFileOrDataStore ,
974
978
* ,
975
979
engine : T_Engine = None ,
976
980
chunks : T_Chunks = None ,
@@ -1001,8 +1005,10 @@ def open_datatree(
1001
1005
1002
1006
Parameters
1003
1007
----------
1004
- filename_or_obj : str, Path, file-like, or DataStore
1005
- Strings and Path objects are interpreted as a path to a netCDF file or Zarr store.
1008
+ filename_or_obj : str, Path, file-like, bytes or DataStore
1009
+ Strings and Path objects are interpreted as a path to a netCDF file or
1010
+ Zarr store. Bytes and memoryview objects are interpreted as file
1011
+ contents.
1006
1012
engine : {"netcdf4", "h5netcdf", "zarr", None}, \
1007
1013
installed backend or xarray.backends.BackendEntrypoint, optional
1008
1014
Engine to use when reading files. If not provided, the default engine
@@ -1208,7 +1214,7 @@ def open_datatree(
1208
1214
1209
1215
1210
1216
def open_groups (
1211
- filename_or_obj : str | os . PathLike [ Any ] | ReadBuffer | AbstractDataStore ,
1217
+ filename_or_obj : T_PathFileOrDataStore ,
1212
1218
* ,
1213
1219
engine : T_Engine = None ,
1214
1220
chunks : T_Chunks = None ,
@@ -1243,8 +1249,10 @@ def open_groups(
1243
1249
1244
1250
Parameters
1245
1251
----------
1246
- filename_or_obj : str, Path, file-like, or DataStore
1247
- Strings and Path objects are interpreted as a path to a netCDF file or Zarr store.
1252
+ filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore
1253
+ Strings and Path objects are interpreted as a path to a netCDF file or
1254
+ Zarr store. Bytes and memoryview objects are interpreted as file
1255
+ contents.
1248
1256
engine : {"netcdf4", "h5netcdf", "zarr", None}, \
1249
1257
installed backend or xarray.backends.BackendEntrypoint, optional
1250
1258
Engine to use when reading files. If not provided, the default engine
@@ -1780,7 +1788,7 @@ def to_netcdf(
1780
1788
) -> tuple [ArrayWriter , AbstractDataStore ]: ...
1781
1789
1782
1790
1783
- # path=None writes to bytes
1791
+ # path=None writes to bytes or memoryview, depending on store
1784
1792
@overload
1785
1793
def to_netcdf (
1786
1794
dataset : Dataset ,
@@ -1795,7 +1803,7 @@ def to_netcdf(
1795
1803
multifile : Literal [False ] = False ,
1796
1804
invalid_netcdf : bool = False ,
1797
1805
auto_complex : bool | None = None ,
1798
- ) -> bytes : ...
1806
+ ) -> bytes | memoryview : ...
1799
1807
1800
1808
1801
1809
# compute=False returns dask.Delayed
@@ -1821,7 +1829,7 @@ def to_netcdf(
1821
1829
@overload
1822
1830
def to_netcdf (
1823
1831
dataset : Dataset ,
1824
- path_or_file : str | os .PathLike ,
1832
+ path_or_file : str | os .PathLike | IOBase ,
1825
1833
mode : NetcdfWriteModes = "w" ,
1826
1834
format : T_NetcdfTypes | None = None ,
1827
1835
group : str | None = None ,
@@ -1877,7 +1885,7 @@ def to_netcdf(
1877
1885
@overload
1878
1886
def to_netcdf (
1879
1887
dataset : Dataset ,
1880
- path_or_file : str | os .PathLike | None ,
1888
+ path_or_file : str | os .PathLike | IOBase | None ,
1881
1889
mode : NetcdfWriteModes = "w" ,
1882
1890
format : T_NetcdfTypes | None = None ,
1883
1891
group : str | None = None ,
@@ -1888,12 +1896,12 @@ def to_netcdf(
1888
1896
multifile : bool = False ,
1889
1897
invalid_netcdf : bool = False ,
1890
1898
auto_complex : bool | None = None ,
1891
- ) -> tuple [ArrayWriter , AbstractDataStore ] | bytes | Delayed | None : ...
1899
+ ) -> tuple [ArrayWriter , AbstractDataStore ] | bytes | memoryview | Delayed | None : ...
1892
1900
1893
1901
1894
1902
def to_netcdf (
1895
1903
dataset : Dataset ,
1896
- path_or_file : str | os .PathLike | None = None ,
1904
+ path_or_file : str | os .PathLike | IOBase | None = None ,
1897
1905
mode : NetcdfWriteModes = "w" ,
1898
1906
format : T_NetcdfTypes | None = None ,
1899
1907
group : str | None = None ,
@@ -1904,7 +1912,7 @@ def to_netcdf(
1904
1912
multifile : bool = False ,
1905
1913
invalid_netcdf : bool = False ,
1906
1914
auto_complex : bool | None = None ,
1907
- ) -> tuple [ArrayWriter , AbstractDataStore ] | bytes | Delayed | None :
1915
+ ) -> tuple [ArrayWriter , AbstractDataStore ] | bytes | memoryview | Delayed | None :
1908
1916
"""This function creates an appropriate datastore for writing a dataset to
1909
1917
disk as a netCDF file
1910
1918
@@ -1918,26 +1926,27 @@ def to_netcdf(
1918
1926
if encoding is None :
1919
1927
encoding = {}
1920
1928
1921
- if path_or_file is None :
1929
+ if isinstance (path_or_file , str ):
1930
+ if engine is None :
1931
+ engine = _get_default_engine (path_or_file )
1932
+ path_or_file = _normalize_path (path_or_file )
1933
+ else :
1934
+ # writing to bytes/memoryview or a file-like object
1922
1935
if engine is None :
1936
+ # TODO: only use 'scipy' if format is None or a netCDF3 format
1923
1937
engine = "scipy"
1924
- elif engine != "scipy" :
1938
+ elif engine not in ( "scipy" , "h5netcdf" ) :
1925
1939
raise ValueError (
1926
- "invalid engine for creating bytes with "
1927
- f"to_netcdf: { engine !r} . Only the default engine "
1928
- "or engine='scipy' is supported"
1940
+ "invalid engine for creating bytes/memoryview or writing to a "
1941
+ f"file-like object with to_netcdf: { engine !r} . Only "
1942
+ "engine=None, engine='scipy' and engine='h5netcdf' is "
1943
+ "supported."
1929
1944
)
1930
1945
if not compute :
1931
1946
raise NotImplementedError (
1932
1947
"to_netcdf() with compute=False is not yet implemented when "
1933
1948
"returning bytes"
1934
1949
)
1935
- elif isinstance (path_or_file , str ):
1936
- if engine is None :
1937
- engine = _get_default_engine (path_or_file )
1938
- path_or_file = _normalize_path (path_or_file )
1939
- else : # file-like object
1940
- engine = "scipy"
1941
1950
1942
1951
# validate Dataset keys, DataArray names, and attr keys/values
1943
1952
_validate_dataset_names (dataset )
@@ -1962,7 +1971,11 @@ def to_netcdf(
1962
1971
f"is not currently supported with dask's { scheduler } scheduler"
1963
1972
)
1964
1973
1965
- target = path_or_file if path_or_file is not None else BytesIO ()
1974
+ if path_or_file is None :
1975
+ target = BytesIOProxy ()
1976
+ else :
1977
+ target = path_or_file # type: ignore[assignment]
1978
+
1966
1979
kwargs = dict (autoclose = True ) if autoclose else {}
1967
1980
if invalid_netcdf :
1968
1981
if engine == "h5netcdf" :
@@ -2002,17 +2015,19 @@ def to_netcdf(
2002
2015
2003
2016
writes = writer .sync (compute = compute )
2004
2017
2005
- if isinstance (target , BytesIO ):
2006
- store .sync ()
2007
- return target .getvalue ()
2008
2018
finally :
2009
2019
if not multifile and compute : # type: ignore[redundant-expr]
2010
2020
store .close ()
2011
2021
2022
+ if path_or_file is None :
2023
+ assert isinstance (target , BytesIOProxy ) # created in this function
2024
+ return target .getvalue_or_getbuffer ()
2025
+
2012
2026
if not compute :
2013
2027
import dask
2014
2028
2015
2029
return dask .delayed (_finalize_store )(writes , store )
2030
+
2016
2031
return None
2017
2032
2018
2033
0 commit comments