2323 from xarray .core .types import T_Chunks
2424 from xarray .core .types import ZarrWriteModes
2525
26-
2726def _normalize_path (path : UPath | Path | str ) -> UPath :
28- return UPath ( path )
27+ """Normalize a path to a UPath.
2928
29+ For gs:// paths, the fake GCS server configuration is handled via storage_options
30+ in _normalize_storage_options().
31+ """
32+ from upath import UPath
33+
34+ return UPath (path )
3035
3136def _normalize_storage_options (path : UPath ) -> dict [str , Any ] | None :
32- return None if len (path .storage_options ) == 0 else path .storage_options
37+ """Normalize and patch storage options for UPath paths.
38+
39+ - Extracts any existing options from the UPath.
40+ - Automatically redirects gs:// URLs to a local fake-GCS endpoint
41+ when testing (localhost:4443).
42+ """
43+ import gcsfs
44+
45+ # Start with any existing options from UPath
46+ storage_options = dict (path .storage_options ) if len (path .storage_options ) else {}
47+
48+ # Redirect gs:// to local fake-GCS server for testing
49+ if str (path ).startswith ("gs://" ):
50+ fs = gcsfs .GCSFileSystem (
51+ endpoint_url = "http://localhost:4443" ,
52+ token = "anon" ,
53+ )
54+ base_url = getattr (getattr (fs , "session" , None ), "_base_url" , "http://localhost:4443" )
55+ print (f"[mdio.utils] Redirecting GCS path to local fake server: { base_url } " )
56+ storage_options ["fs" ] = fs
57+
58+ return storage_options or None
59+
60+ # def _normalize_path(path: UPath | Path | str) -> UPath:
61+ # return UPath(path)
62+
63+
64+ # def _normalize_storage_options(path: UPath) -> dict[str, Any] | None:
65+ # return None if len(path.storage_options) == 0 else path.storage_options
3366
3467
3568def open_mdio (input_path : UPath | Path | str , chunks : T_Chunks = None ) -> xr_Dataset :
@@ -49,6 +82,8 @@ def open_mdio(input_path: UPath | Path | str, chunks: T_Chunks = None) -> xr_Dat
4982 Returns:
5083 An Xarray dataset opened from the input path.
5184 """
85+ import zarr
86+
5287 input_path = _normalize_path (input_path )
5388 storage_options = _normalize_storage_options (input_path )
5489 zarr_format = zarr .config .get ("default_zarr_format" )
@@ -61,43 +96,101 @@ def open_mdio(input_path: UPath | Path | str, chunks: T_Chunks = None) -> xr_Dat
6196 consolidated = zarr_format == ZarrFormat .V2 , # on for v2, off for v3
6297 )
6398
64-
65- def to_mdio ( # noqa: PLR0913
99+ def to_mdio (
66100 dataset : Dataset ,
67101 output_path : UPath | Path | str ,
68102 mode : ZarrWriteModes | None = None ,
69103 * ,
70104 compute : bool = True ,
71- region : Mapping [str , slice | Literal ["auto" ]] | Literal ["auto" ] | None = None ,
72- ) -> None :
73- """Write dataset contents to an MDIO output_path.
105+ region : Mapping [str , slice | Literal ["auto" ]] | Literal ["auto" ] | None = None ,):
106+ """Write dataset contents to an MDIO output_path."""
107+ import gcsfs
108+ import zarr
74109
75- Args:
76- dataset: The dataset to write.
77- output_path: The universal path of the output MDIO file.
78- mode: Persistence mode: "w" means create (overwrite if exists)
79- "w-" means create (fail if exists)
80- "a" means override all existing variables including dimension coordinates (create if does not exist)
81- "a-" means only append those variables that have ``append_dim``.
82- "r+" means modify existing array *values* only (raise an error if any metadata or shapes would change).
83- The default mode is "r+" if ``region`` is set and ``w-`` otherwise.
84- compute: If True write array data immediately; otherwise return a ``dask.delayed.Delayed`` object that
85- can be computed to write array data later. Metadata is always updated eagerly.
86- region: Optional mapping from dimension names to either a) ``"auto"``, or b) integer slices, indicating
87- the region of existing MDIO array(s) in which to write this dataset's data.
88- """
89110 output_path = _normalize_path (output_path )
90- storage_options = _normalize_storage_options (output_path )
91111 zarr_format = zarr .config .get ("default_zarr_format" )
92112
93- with zarr_warnings_suppress_unstable_structs_v3 ():
94- xr_to_zarr (
95- dataset ,
96- store = output_path .as_posix (), # xarray doesn't like URI when file:// is protocol
97- mode = mode ,
98- compute = compute ,
99- consolidated = zarr_format == ZarrFormat .V2 , # on for v2, off for v3
100- region = region ,
101- storage_options = storage_options ,
102- write_empty_chunks = False ,
113+ # For GCS paths, create FSMap for fake GCS server
114+ if str (output_path ).startswith ("gs://" ):
115+ fs = gcsfs .GCSFileSystem (
116+ endpoint_url = "http://localhost:4443" ,
117+ token = "anon" ,
103118 )
119+ base_url = getattr (getattr (fs , "session" , None ), "_base_url" , "http://localhost:4443" )
120+ print (f"[mdio.utils] Using fake GCS mapper via { base_url } " )
121+ store = fs .get_mapper (output_path .as_posix ().replace ("gs://" , "" ))
122+ storage_options = None # Must be None when passing a mapper
123+ else :
124+ store = output_path .as_posix ()
125+ storage_options = _normalize_storage_options (output_path )
126+
127+ print (f"[mdio.utils] Writing to store: { store } " )
128+ print (f"[mdio.utils] Storage options: { storage_options } " )
129+
130+ kwargs = dict (
131+ dataset = dataset ,
132+ store = store ,
133+ mode = mode ,
134+ compute = compute ,
135+ consolidated = zarr_format == ZarrFormat .V2 ,
136+ region = region ,
137+ write_empty_chunks = False ,
138+ )
139+ if storage_options is not None and not isinstance (store , dict ):
140+ kwargs ["storage_options" ] = storage_options
141+
142+ with zarr_warnings_suppress_unstable_structs_v3 ():
143+ xr_to_zarr (** kwargs )
144+
145+
146+ # def to_mdio( # noqa: PLR0913
147+ # dataset: Dataset,
148+ # output_path: UPath | Path | str,
149+ # mode: ZarrWriteModes | None = None,
150+ # *,
151+ # compute: bool = True,
152+ # region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None,
153+ # ) -> None:
154+ # """Write dataset contents to an MDIO output_path."""
155+ # import gcsfs, zarr
156+
157+ # output_path = _normalize_path(output_path)
158+
159+ # if output_path.as_posix().startswith("gs://"):
160+ # fs = gcsfs.GCSFileSystem(
161+ # endpoint_url="http://localhost:4443",
162+ # token="anon",
163+ # )
164+
165+ # base_url = getattr(getattr(fs, "session", None), "_base_url", "http://localhost:4443")
166+ # print(f"Using custom fake GCS filesystem with endpoint {base_url}")
167+
168+ # # Build a mapper so all I/O uses the fake GCS server
169+ # mapper = fs.get_mapper(output_path.as_posix().replace("gs://", ""))
170+ # store = mapper
171+ # storage_options = None # must be None when passing a mapper
172+ # else:
173+ # store = output_path.as_posix()
174+ # storage_options = _normalize_storage_options(output_path) or {}
175+
176+ # print(f"Writing to store: {store}")
177+ # zarr_format = zarr.config.get("default_zarr_format")
178+
179+ # kwargs = dict(
180+ # dataset=dataset,
181+ # store=store,
182+ # mode=mode,
183+ # compute=compute,
184+ # consolidated=zarr_format == ZarrFormat.V2,
185+ # region=region,
186+ # write_empty_chunks=False,
187+ # )
188+ # if storage_options is not None:
189+ # kwargs["storage_options"] = storage_options
190+
191+ # with zarr_warnings_suppress_unstable_structs_v3():
192+ # xr_to_zarr(**kwargs)
193+
194+
195+
196+
0 commit comments