1313from xarray .backends .writers import to_zarr as xr_to_zarr
1414
1515from mdio .constants import ZarrFormat
16+ from mdio .core .config import MDIOSettings
1617from mdio .core .zarr_io import zarr_warnings_suppress_unstable_structs_v3
1718
1819if TYPE_CHECKING :
2324 from xarray .core .types import T_Chunks
2425 from xarray .core .types import ZarrWriteModes
2526
26- def _normalize_path (path : UPath | Path | str ) -> UPath :
27- """Normalize a path to a UPath.
28-
29- For gs:// paths, the fake GCS server configuration is handled via storage_options
30- in _normalize_storage_options().
31- """
32- from upath import UPath
3327
28+ def _normalize_path (path : UPath | Path | str ) -> UPath :
29+ """Normalize a path to a UPath."""
3430 return UPath (path )
3531
32+
3633def _normalize_storage_options (path : UPath ) -> dict [str , Any ] | None :
37- """Normalize and patch storage options for UPath paths.
34+ """Normalize storage options from UPath."""
35+ return None if len (path .storage_options ) == 0 else path .storage_options
36+
37+
38+ def _get_gcs_store (path : UPath ) -> tuple [Any , dict [str , Any ] | None ]:
39+ """Get store and storage options, using local fake GCS server if enabled.
40+
41+ Args:
42+ path: UPath pointing to storage location.
3843
39- - Extracts any existing options from the UPath.
40- - Automatically redirects gs:// URLs to a local fake-GCS endpoint
41- when testing (localhost:4443).
44+ Returns:
45+ Tuple of (store, storage_options) where store is either a mapper or path string.
4246 """
47+ settings = MDIOSettings ()
4348
44- # Start with any existing options from UPath
45- storage_options = dict ( path . storage_options ) if len ( path . storage_options ) else {}
49+ if settings . local_gcs_server and str ( path ). startswith ( "gs://" ):
50+ import gcsfs # noqa: PLC0415
4651
47- # Redirect gs:// to local fake-GCS server for testing
48- if str (path ).startswith ("gs://" ):
49- import gcsfs
5052 fs = gcsfs .GCSFileSystem (
5153 endpoint_url = "http://localhost:4443" ,
52- token = "anon" ,
54+ token = "anon" , # noqa: S106
5355 )
54- base_url = getattr (getattr (fs , "session" , None ), "_base_url" , "http://localhost:4443" )
55- print (f"[mdio.utils] Redirecting GCS path to local fake server: { base_url } " )
56- storage_options ["fs" ] = fs
57-
58- return storage_options or None
59-
60- # def _normalize_path(path: UPath | Path | str) -> UPath:
61- # return UPath(path)
56+ store = fs .get_mapper (path .as_posix ().replace ("gs://" , "" ))
57+ return store , None
6258
63-
64- # def _normalize_storage_options(path: UPath) -> dict[str, Any] | None:
65- # return None if len(path.storage_options) == 0 else path.storage_options
59+ return path .as_posix (), _normalize_storage_options (path )
6660
6761
6862def open_mdio (input_path : UPath | Path | str , chunks : T_Chunks = None ) -> xr_Dataset :
@@ -82,8 +76,6 @@ def open_mdio(input_path: UPath | Path | str, chunks: T_Chunks = None) -> xr_Dat
8276 Returns:
8377 An Xarray dataset opened from the input path.
8478 """
85- import zarr
86-
8779 input_path = _normalize_path (input_path )
8880 storage_options = _normalize_storage_options (input_path )
8981 zarr_format = zarr .config .get ("default_zarr_format" )
@@ -96,101 +88,48 @@ def open_mdio(input_path: UPath | Path | str, chunks: T_Chunks = None) -> xr_Dat
9688 consolidated = zarr_format == ZarrFormat .V2 , # on for v2, off for v3
9789 )
9890
99- def to_mdio (
91+
92+ def to_mdio ( # noqa: PLR0913
10093 dataset : Dataset ,
10194 output_path : UPath | Path | str ,
10295 mode : ZarrWriteModes | None = None ,
10396 * ,
10497 compute : bool = True ,
105- region : Mapping [str , slice | Literal ["auto" ]] | Literal ["auto" ] | None = None ,):
106- """Write dataset contents to an MDIO output_path."""
107- import zarr
98+ region : Mapping [str , slice | Literal ["auto" ]] | Literal ["auto" ] | None = None ,
99+ ) -> None :
100+ """Write dataset contents to an MDIO output_path.
108101
102+ Args:
103+ dataset: The dataset to write.
104+ output_path: The universal path of the output MDIO file.
105+ mode: Persistence mode: "w" means create (overwrite if exists)
106+ "w-" means create (fail if exists)
107+ "a" means override all existing variables including dimension coordinates (create if does not exist)
108+ "a-" means only append those variables that have ``append_dim``.
109+ "r+" means modify existing array *values* only (raise an error if any metadata or shapes would change).
110+ The default mode is "r+" if ``region`` is set and ``w-`` otherwise.
111+ compute: If True write array data immediately; otherwise return a ``dask.delayed.Delayed`` object that
112+ can be computed to write array data later. Metadata is always updated eagerly.
113+ region: Optional mapping from dimension names to either a) ``"auto"``, or b) integer slices, indicating
114+ the region of existing MDIO array(s) in which to write this dataset's data.
115+ """
109116 output_path = _normalize_path (output_path )
110117 zarr_format = zarr .config .get ("default_zarr_format" )
111118
112- # For GCS paths, create FSMap for fake GCS server
113- if str (output_path ).startswith ("gs://" ):
114- import gcsfs
115- fs = gcsfs .GCSFileSystem (
116- endpoint_url = "http://localhost:4443" ,
117- token = "anon" ,
118- )
119- base_url = getattr (getattr (fs , "session" , None ), "_base_url" , "http://localhost:4443" )
120- print (f"[mdio.utils] Using fake GCS mapper via { base_url } " )
121- store = fs .get_mapper (output_path .as_posix ().replace ("gs://" , "" ))
122- storage_options = None # Must be None when passing a mapper
123- else :
124- store = output_path .as_posix ()
125- storage_options = _normalize_storage_options (output_path )
126-
127- print (f"[mdio.utils] Writing to store: { store } " )
128- print (f"[mdio.utils] Storage options: { storage_options } " )
129-
130- kwargs = dict (
131- dataset = dataset ,
132- store = store ,
133- mode = mode ,
134- compute = compute ,
135- consolidated = zarr_format == ZarrFormat .V2 ,
136- region = region ,
137- write_empty_chunks = False ,
138- )
119+ store , storage_options = _get_gcs_store (output_path )
120+
121+ kwargs = {
122+ "dataset" : dataset ,
123+ "store" : store ,
124+ "mode" : mode ,
125+ "compute" : compute ,
126+ "consolidated" : zarr_format == ZarrFormat .V2 ,
127+ "region" : region ,
128+ "write_empty_chunks" : False ,
129+ }
130+
139131 if storage_options is not None and not isinstance (store , dict ):
140132 kwargs ["storage_options" ] = storage_options
141133
142134 with zarr_warnings_suppress_unstable_structs_v3 ():
143135 xr_to_zarr (** kwargs )
144-
145-
146- # def to_mdio( # noqa: PLR0913
147- # dataset: Dataset,
148- # output_path: UPath | Path | str,
149- # mode: ZarrWriteModes | None = None,
150- # *,
151- # compute: bool = True,
152- # region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None,
153- # ) -> None:
154- # """Write dataset contents to an MDIO output_path."""
155- # import gcsfs, zarr
156-
157- # output_path = _normalize_path(output_path)
158-
159- # if output_path.as_posix().startswith("gs://"):
160- # fs = gcsfs.GCSFileSystem(
161- # endpoint_url="http://localhost:4443",
162- # token="anon",
163- # )
164-
165- # base_url = getattr(getattr(fs, "session", None), "_base_url", "http://localhost:4443")
166- # print(f"Using custom fake GCS filesystem with endpoint {base_url}")
167-
168- # # Build a mapper so all I/O uses the fake GCS server
169- # mapper = fs.get_mapper(output_path.as_posix().replace("gs://", ""))
170- # store = mapper
171- # storage_options = None # must be None when passing a mapper
172- # else:
173- # store = output_path.as_posix()
174- # storage_options = _normalize_storage_options(output_path) or {}
175-
176- # print(f"Writing to store: {store}")
177- # zarr_format = zarr.config.get("default_zarr_format")
178-
179- # kwargs = dict(
180- # dataset=dataset,
181- # store=store,
182- # mode=mode,
183- # compute=compute,
184- # consolidated=zarr_format == ZarrFormat.V2,
185- # region=region,
186- # write_empty_chunks=False,
187- # )
188- # if storage_options is not None:
189- # kwargs["storage_options"] = storage_options
190-
191- # with zarr_warnings_suppress_unstable_structs_v3():
192- # xr_to_zarr(**kwargs)
193-
194-
195-
196-
0 commit comments