1212
1313from xrlint .plugins .xcube .constants import ML_FILE_PATTERN , ML_META_FILENAME
1414from xrlint .plugins .xcube .plugin import plugin
15- from xrlint .plugins .xcube .util import LevelsMeta , attach_dataset_level_infos , norm_path
15+ from xrlint .plugins .xcube .util import (
16+ LevelsMeta ,
17+ attach_dataset_level_infos ,
18+ resolve_path ,
19+ )
1620from xrlint .processor import ProcessorOp
1721from xrlint .result import Message
1822
1923level_pattern = re .compile (r"^(\d+)(?:\.zarr)?$" )
24+ link_pattern = re .compile (r"^(\d+)(?:\.link)?$" )
2025
2126
2227@plugin .define_processor ("multi-level-dataset" )
@@ -25,7 +30,7 @@ class MultiLevelDatasetProcessor(ProcessorOp):
2530
2631 def preprocess (
2732 self , file_path : str , opener_options : dict [str , Any ]
28- ) -> list [tuple [xr .Dataset , str ]]:
33+ ) -> list [tuple [xr .Dataset | xr . DataTree , str ]]:
2934 fs , fs_path = get_filesystem (file_path , opener_options )
3035
3136 file_names = [
@@ -40,18 +45,17 @@ def preprocess(
4045 with fs .open (f"{ fs_path } /{ ML_META_FILENAME } " ) as stream :
4146 meta = LevelsMeta .from_value (json .load (stream ))
4247
43- # check for optional ".0.link" that locates level 0 somewhere else
44- level_0_path = None
45- if "0.link" in file_names :
46- level_0_path = fs . read_text ( f" { fs_path } /0.link" )
48+ # check for optional ".zgroup"
49+ # if ".zgroup" in file_names:
50+ # with fs.open(f"{fs_path}/.zgroup") as stream :
51+ # group_props = json.load(stream )
4752
48- level_names , num_levels = parse_levels (file_names , level_0_path )
53+ level_paths , num_levels = parse_levels (fs , file_path , file_names )
4954
5055 engine = opener_options .pop ("engine" , "zarr" )
5156
5257 level_datasets : list [xr .Dataset | None ] = []
53- for level , level_name in level_names .items ():
54- level_path = norm_path (f"{ file_path } /{ level_name } " )
58+ for level , level_path in level_paths .items ():
5559 level_dataset = xr .open_dataset (level_path , engine = engine , ** opener_options )
5660 level_datasets .append ((level_dataset , level_path ))
5761
@@ -80,22 +84,30 @@ def get_filesystem(file_path: str, opener_options: dict[str, Any]):
8084
8185
8286def parse_levels (
83- file_names : list [ str ], level_0_path : str | None
87+ fs : fsspec . AbstractFileSystem , dataset_path : str , file_names : list [ str ]
8488) -> tuple [dict [int , str ], int ]:
85- level_names : dict [int , str ] = {0 : level_0_path } if level_0_path else {}
86- num_levels = 0
89+ level_paths : dict [int , str ] = {}
8790 for file_name in file_names :
91+ # check for optional "<level>.link" that locates a level somewhere else
92+ m = link_pattern .match (file_name )
93+ if m is not None :
94+ level = int (m .group (1 ))
95+ link_path = fs .read_text (f"{ dataset_path } /{ file_name } " )
96+ level_paths [level ] = resolve_path (link_path , root_path = dataset_path )
97+ # check for regular "<level>.zarr"
8898 m = level_pattern .match (file_name )
8999 if m is not None :
90100 level = int (m .group (1 ))
91- level_names [level ] = file_name
92- num_levels = max ( num_levels , level + 1 )
93- if not level_names :
101+ level_paths [level ] = f" { dataset_path } / { file_name } "
102+
103+ if not level_paths :
94104 raise ValueError ("empty multi-level dataset" )
95- num_levels = max (level_names .keys ()) + 1
105+
106+ num_levels = max (level_paths .keys ()) + 1
96107 for level in range (num_levels ):
97- if level not in level_names :
108+ if level not in level_paths :
98109 raise ValueError (
99110 f"missing dataset for level { level } in multi-level dataset"
100111 )
101- return level_names , num_levels
112+
113+ return level_paths , num_levels
0 commit comments