55# https://opensource.org/licenses/MIT.
66
77from pathlib import Path
8+ from typing import Literal , Optional , Tuple
89
910import click
11+ import yaml
1012
1113from deep_code .tools .publish import Publisher
1214
15+ Mode = Literal ["all" , "dataset" , "workflow" ]
1316
14- def _validate_inputs (dataset_config , workflow_config , mode ):
17+ DATASET_MARKERS = {
18+ "stac_version" ,
19+ "extent" ,
20+ "license" ,
21+ "summaries" ,
22+ "assets" ,
23+ "providers" ,
24+ "collection" ,
25+ "collection_id" ,
26+ "id" ,
27+ }
28+ WORKFLOW_MARKERS = {
29+ "workflow" ,
30+ "workflow_id" ,
31+ "workflow_title" ,
32+ "experiment" ,
33+ "jupyter_notebook_url" ,
34+ "notebook" ,
35+ "parameters" ,
36+ "input_datasets" ,
37+ }
38+
39+
40+ def _validate_inputs (
41+ dataset_config : Optional [str ], workflow_config : Optional [str ], mode : str
42+ ):
1543 mode = mode .lower ()
1644
17- def ensure_file (path : str , label : str ):
45+ def ensure_file (path : Optional [ str ] , label : str ):
1846 if path is None :
1947 raise click .UsageError (f"{ label } is required but was not provided." )
2048 if not Path (path ).is_file ():
2149 raise click .UsageError (f"{ label } not found: { path } " )
2250
2351 if mode == "dataset" :
24- # Need dataset only
2552 ensure_file (dataset_config , "DATASET_CONFIG" )
2653 if workflow_config is not None :
27- click .echo ("ℹ️ Ignoring WORKFLOW_CONFIG since mode=dataset." , err = True )
54+ click .echo ("Ignoring WORKFLOW_CONFIG since mode=dataset." , err = True )
2855
2956 elif mode == "workflow" :
30- # Need workflow config only
3157 ensure_file (workflow_config , "WORKFLOW_CONFIG" )
3258
3359 elif mode == "all" :
34- # Need both
3560 ensure_file (dataset_config , "DATASET_CONFIG" )
3661 ensure_file (workflow_config , "WORKFLOW_CONFIG" )
3762
3863 else :
64+ raise click .UsageError ("Invalid mode. Choose one of: all, dataset, workflow." )
65+
66+
67+ def _detect_config_type (path : Path ) -> Literal ["dataset" , "workflow" ]:
68+ """Detect config type via filename hints and YAML top-level keys."""
69+ name = path .name .lower ()
70+ if "workflow" in name or "experiment" in name :
71+ return "workflow"
72+ if "dataset" in name or "collection" in name :
73+ return "dataset"
74+
75+ try :
76+ data = yaml .safe_load (path .read_text (encoding = "utf-8" ))
77+ except Exception as e :
78+ raise ValueError (f"Cannot read YAML from { path } : { e } " )
79+
80+ if not isinstance (data , dict ):
81+ raise ValueError (f"YAML in { path } must be a mapping/object at the top level." )
82+
83+ keys = set (data .keys ())
84+ ds_score = len (keys & DATASET_MARKERS )
85+ wf_score = len (keys & WORKFLOW_MARKERS )
86+
87+ if ds_score > wf_score :
88+ return "dataset"
89+ if wf_score > ds_score :
90+ return "workflow"
91+
92+ raise ValueError (
93+ f"Ambiguous config type for { path } . "
94+ "Rename to include 'dataset' or 'workflow', or pass the missing file explicitly."
95+ )
96+
97+
98+ def _assign_configs (
99+ pos_first : Optional [str ],
100+ pos_second : Optional [str ],
101+ mode : Mode ,
102+ explicit_dataset : Optional [str ],
103+ explicit_workflow : Optional [str ],
104+ ) -> Tuple [Optional [str ], Optional [str ]]:
105+ """
106+ Decide which file is dataset vs workflow.
107+ Precedence: explicit flags > positional + detection.
108+ Returns (dataset_config, workflow_config).
109+ """
110+ ds = explicit_dataset
111+ wf = explicit_workflow
112+
113+ # If both explicit provided, we're done; warn if extra positionals are passed.
114+ pos_args = [p for p in (pos_first , pos_second ) if p ]
115+ if ds and wf :
116+ if pos_args :
117+ click .echo (
118+ "Positional config paths ignored because explicit flags were provided." ,
119+ err = True ,
120+ )
121+ return ds , wf
122+
123+ # Helper to assign a single positional file to the missing slot
124+ def _assign_single (p : str ):
125+ nonlocal ds , wf
126+ if ds and wf :
127+ raise click .UsageError (
128+ "Both dataset and workflow configs already provided; remove extra positional files."
129+ )
130+ # Use mode as a strong hint when only one is missing
131+ if not ds and mode == "dataset" :
132+ ds = p
133+ return
134+ if not wf and mode == "workflow" :
135+ wf = p
136+ return
137+ # Otherwise detect
138+ kind = _detect_config_type (Path (p ))
139+ if kind == "dataset" :
140+ if ds and Path (ds ).resolve () != Path (p ).resolve ():
141+ raise click .UsageError (
142+ f"Multiple dataset configs supplied: { ds } and { p } "
143+ )
144+ ds = p
145+ else :
146+ if wf and Path (wf ).resolve () != Path (p ).resolve ():
147+ raise click .UsageError (
148+ f"Multiple workflow configs supplied: { wf } and { p } "
149+ )
150+ wf = p
151+
152+ # If exactly one explicit provided, try to fill the other via positionals
153+ if ds and not wf :
154+ if len (pos_args ) > 1 :
155+ raise click .UsageError (
156+ "Provide at most one positional file when using --dataset-config."
157+ )
158+ if pos_args :
159+ _assign_single (pos_args [0 ])
160+ return ds , wf
161+
162+ if wf and not ds :
163+ if len (pos_args ) > 1 :
164+ raise click .UsageError (
165+ "Provide at most one positional file when using --workflow-config."
166+ )
167+ if pos_args :
168+ _assign_single (pos_args [0 ])
169+ return ds , wf
170+
171+ # No explicit flags: rely on positionals + detection
172+ if not pos_args :
173+ return None , None
174+ if len (pos_args ) == 1 :
175+ p = pos_args [0 ]
176+ if mode == "dataset" :
177+ return p , None
178+ if mode == "workflow" :
179+ return None , p
180+ # mode == "all": detect and require the other later in validation
181+ kind = _detect_config_type (Path (p ))
182+ return (p , None ) if kind == "dataset" else (None , p )
183+
184+ # Two positionals: detect both and assign
185+ p1 , p2 = pos_args [0 ], pos_args [1 ]
186+ k1 = _detect_config_type (Path (p1 ))
187+ k2 = _detect_config_type (Path (p2 ))
188+ if k1 == k2 :
39189 raise click .UsageError (
40- "Invalid mode. Choose one of: all, dataset, workflow_experiment."
190+ f"Both files look like '{ k1 } ' configs: { p1 } and { p2 } . "
191+ "Please rename one or use --dataset-config/--workflow-config."
41192 )
193+ ds = p1 if k1 == "dataset" else p2
194+ wf = p1 if k1 == "workflow" else p2
195+ return ds , wf
42196
43197
44198@click .command (name = "publish" )
45- @click .argument ("dataset_config" , type = click .Path (exists = True ))
46- @click .argument ("workflow_config" , type = click .Path (exists = True ))
199+ @click .argument ("dataset_config" , type = click .Path (exists = True ), required = False )
200+ @click .argument ("workflow_config" , type = click .Path (exists = True ), required = False )
201+ @click .option (
202+ "--dataset-config" ,
203+ "dataset_config_opt" ,
204+ type = click .Path (exists = True ),
205+ help = "Explicit path to dataset config (overrides positional detection)." ,
206+ )
207+ @click .option (
208+ "--workflow-config" ,
209+ "workflow_config_opt" ,
210+ type = click .Path (exists = True ),
211+ help = "Explicit path to workflow config (overrides positional detection)." ,
212+ )
47213@click .option (
48214 "--environment" ,
49215 "-e" ,
@@ -58,14 +224,41 @@ def ensure_file(path: str, label: str):
58224 default = "all" ,
59225 help = "Publishing mode: dataset only, workflow only, or both" ,
60226)
61- def publish (dataset_config , workflow_config , environment , mode ):
62- """Request publishing a dataset along with experiment and workflow metadata to the
63- open science catalogue.
227+ def publish (
228+ dataset_config ,
229+ workflow_config ,
230+ dataset_config_opt ,
231+ workflow_config_opt ,
232+ environment ,
233+ mode ,
234+ ):
64235 """
236+ Publish dataset and/or workflow/experiment metadata.
237+
238+ Examples:
239+ deep-code publish workflow.yaml -e staging -m workflow
240+ deep-code publish dataset.yaml -e staging -m dataset
241+ deep-code publish dataset.yaml workflow.yaml -m all
242+ deep-code publish --dataset-config dataset.yaml --workflow-config wf.yaml -m all
243+ deep-code publish --dataset-config dataset.yaml -m dataset
244+ deep-code publish --workflow-config wf.yaml -m workflow
245+ """
246+ mode = mode .lower ()
247+ ds_path , wf_path = _assign_configs (
248+ dataset_config ,
249+ workflow_config ,
250+ mode , # type: ignore[arg-type]
251+ dataset_config_opt ,
252+ workflow_config_opt ,
253+ )
254+
255+ _validate_inputs (ds_path , wf_path , mode )
256+
65257 publisher = Publisher (
66- dataset_config_path = dataset_config ,
67- workflow_config_path = workflow_config ,
258+ dataset_config_path = ds_path ,
259+ workflow_config_path = wf_path ,
68260 environment = environment .lower (),
69261 )
70- result = publisher .publish (mode = mode .lower ())
71- click .echo (f"Pull request created: { result } " )
262+ result = publisher .publish (mode = mode )
263+
264+ click .echo (result if isinstance (result , str ) else "Wrote files locally." )
0 commit comments