Skip to content

Commit 62ee7bf

Browse files
committed
enhanced cli to auto detect the config type if not explicitly mentioned
1 parent d54cba2 commit 62ee7bf

File tree

2 files changed

+212
-17
lines changed

2 files changed

+212
-17
lines changed

deep_code/cli/publish.py

Lines changed: 209 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,211 @@
55
# https://opensource.org/licenses/MIT.
66

77
from pathlib import Path
8+
from typing import Literal, Optional, Tuple
89

910
import click
11+
import yaml
1012

1113
from deep_code.tools.publish import Publisher
1214

15+
Mode = Literal["all", "dataset", "workflow"]
1316

14-
def _validate_inputs(dataset_config, workflow_config, mode):
17+
DATASET_MARKERS = {
18+
"stac_version",
19+
"extent",
20+
"license",
21+
"summaries",
22+
"assets",
23+
"providers",
24+
"collection",
25+
"collection_id",
26+
"id",
27+
}
28+
WORKFLOW_MARKERS = {
29+
"workflow",
30+
"workflow_id",
31+
"workflow_title",
32+
"experiment",
33+
"jupyter_notebook_url",
34+
"notebook",
35+
"parameters",
36+
"input_datasets",
37+
}
38+
39+
40+
def _validate_inputs(
41+
dataset_config: Optional[str], workflow_config: Optional[str], mode: str
42+
):
1543
mode = mode.lower()
1644

17-
def ensure_file(path: str, label: str):
45+
def ensure_file(path: Optional[str], label: str):
1846
if path is None:
1947
raise click.UsageError(f"{label} is required but was not provided.")
2048
if not Path(path).is_file():
2149
raise click.UsageError(f"{label} not found: {path}")
2250

2351
if mode == "dataset":
24-
# Need dataset only
2552
ensure_file(dataset_config, "DATASET_CONFIG")
2653
if workflow_config is not None:
27-
click.echo("ℹ️ Ignoring WORKFLOW_CONFIG since mode=dataset.", err=True)
54+
click.echo("Ignoring WORKFLOW_CONFIG since mode=dataset.", err=True)
2855

2956
elif mode == "workflow":
30-
# Need workflow config only
3157
ensure_file(workflow_config, "WORKFLOW_CONFIG")
3258

3359
elif mode == "all":
34-
# Need both
3560
ensure_file(dataset_config, "DATASET_CONFIG")
3661
ensure_file(workflow_config, "WORKFLOW_CONFIG")
3762

3863
else:
64+
raise click.UsageError("Invalid mode. Choose one of: all, dataset, workflow.")
65+
66+
67+
def _detect_config_type(path: Path) -> Literal["dataset", "workflow"]:
68+
"""Detect config type via filename hints and YAML top-level keys."""
69+
name = path.name.lower()
70+
if "workflow" in name or "experiment" in name:
71+
return "workflow"
72+
if "dataset" in name or "collection" in name:
73+
return "dataset"
74+
75+
try:
76+
data = yaml.safe_load(path.read_text(encoding="utf-8"))
77+
except Exception as e:
78+
raise ValueError(f"Cannot read YAML from {path}: {e}")
79+
80+
if not isinstance(data, dict):
81+
raise ValueError(f"YAML in {path} must be a mapping/object at the top level.")
82+
83+
keys = set(data.keys())
84+
ds_score = len(keys & DATASET_MARKERS)
85+
wf_score = len(keys & WORKFLOW_MARKERS)
86+
87+
if ds_score > wf_score:
88+
return "dataset"
89+
if wf_score > ds_score:
90+
return "workflow"
91+
92+
raise ValueError(
93+
f"Ambiguous config type for {path}. "
94+
"Rename to include 'dataset' or 'workflow', or pass the missing file explicitly."
95+
)
96+
97+
98+
def _assign_configs(
99+
pos_first: Optional[str],
100+
pos_second: Optional[str],
101+
mode: Mode,
102+
explicit_dataset: Optional[str],
103+
explicit_workflow: Optional[str],
104+
) -> Tuple[Optional[str], Optional[str]]:
105+
"""
106+
Decide which file is dataset vs workflow.
107+
Precedence: explicit flags > positional + detection.
108+
Returns (dataset_config, workflow_config).
109+
"""
110+
ds = explicit_dataset
111+
wf = explicit_workflow
112+
113+
# If both explicit provided, we're done; warn if extra positionals are passed.
114+
pos_args = [p for p in (pos_first, pos_second) if p]
115+
if ds and wf:
116+
if pos_args:
117+
click.echo(
118+
"Positional config paths ignored because explicit flags were provided.",
119+
err=True,
120+
)
121+
return ds, wf
122+
123+
# Helper to assign a single positional file to the missing slot
124+
def _assign_single(p: str):
125+
nonlocal ds, wf
126+
if ds and wf:
127+
raise click.UsageError(
128+
"Both dataset and workflow configs already provided; remove extra positional files."
129+
)
130+
# Use mode as a strong hint when only one is missing
131+
if not ds and mode == "dataset":
132+
ds = p
133+
return
134+
if not wf and mode == "workflow":
135+
wf = p
136+
return
137+
# Otherwise detect
138+
kind = _detect_config_type(Path(p))
139+
if kind == "dataset":
140+
if ds and Path(ds).resolve() != Path(p).resolve():
141+
raise click.UsageError(
142+
f"Multiple dataset configs supplied: {ds} and {p}"
143+
)
144+
ds = p
145+
else:
146+
if wf and Path(wf).resolve() != Path(p).resolve():
147+
raise click.UsageError(
148+
f"Multiple workflow configs supplied: {wf} and {p}"
149+
)
150+
wf = p
151+
152+
# If exactly one explicit provided, try to fill the other via positionals
153+
if ds and not wf:
154+
if len(pos_args) > 1:
155+
raise click.UsageError(
156+
"Provide at most one positional file when using --dataset-config."
157+
)
158+
if pos_args:
159+
_assign_single(pos_args[0])
160+
return ds, wf
161+
162+
if wf and not ds:
163+
if len(pos_args) > 1:
164+
raise click.UsageError(
165+
"Provide at most one positional file when using --workflow-config."
166+
)
167+
if pos_args:
168+
_assign_single(pos_args[0])
169+
return ds, wf
170+
171+
# No explicit flags: rely on positionals + detection
172+
if not pos_args:
173+
return None, None
174+
if len(pos_args) == 1:
175+
p = pos_args[0]
176+
if mode == "dataset":
177+
return p, None
178+
if mode == "workflow":
179+
return None, p
180+
# mode == "all": detect and require the other later in validation
181+
kind = _detect_config_type(Path(p))
182+
return (p, None) if kind == "dataset" else (None, p)
183+
184+
# Two positionals: detect both and assign
185+
p1, p2 = pos_args[0], pos_args[1]
186+
k1 = _detect_config_type(Path(p1))
187+
k2 = _detect_config_type(Path(p2))
188+
if k1 == k2:
39189
raise click.UsageError(
40-
"Invalid mode. Choose one of: all, dataset, workflow_experiment."
190+
f"Both files look like '{k1}' configs: {p1} and {p2}. "
191+
"Please rename one or use --dataset-config/--workflow-config."
41192
)
193+
ds = p1 if k1 == "dataset" else p2
194+
wf = p1 if k1 == "workflow" else p2
195+
return ds, wf
42196

43197

44198
@click.command(name="publish")
45-
@click.argument("dataset_config", type=click.Path(exists=True))
46-
@click.argument("workflow_config", type=click.Path(exists=True))
199+
@click.argument("dataset_config", type=click.Path(exists=True), required=False)
200+
@click.argument("workflow_config", type=click.Path(exists=True), required=False)
201+
@click.option(
202+
"--dataset-config",
203+
"dataset_config_opt",
204+
type=click.Path(exists=True),
205+
help="Explicit path to dataset config (overrides positional detection).",
206+
)
207+
@click.option(
208+
"--workflow-config",
209+
"workflow_config_opt",
210+
type=click.Path(exists=True),
211+
help="Explicit path to workflow config (overrides positional detection).",
212+
)
47213
@click.option(
48214
"--environment",
49215
"-e",
@@ -58,14 +224,41 @@ def ensure_file(path: str, label: str):
58224
default="all",
59225
help="Publishing mode: dataset only, workflow only, or both",
60226
)
61-
def publish(dataset_config, workflow_config, environment, mode):
62-
"""Request publishing a dataset along with experiment and workflow metadata to the
63-
open science catalogue.
227+
def publish(
228+
dataset_config,
229+
workflow_config,
230+
dataset_config_opt,
231+
workflow_config_opt,
232+
environment,
233+
mode,
234+
):
64235
"""
236+
Publish dataset and/or workflow/experiment metadata.
237+
238+
Examples:
239+
deep-code publish workflow.yaml -e staging -m workflow
240+
deep-code publish dataset.yaml -e staging -m dataset
241+
deep-code publish dataset.yaml workflow.yaml -m all
242+
deep-code publish --dataset-config dataset.yaml --workflow-config wf.yaml -m all
243+
deep-code publish --dataset-config dataset.yaml -m dataset
244+
deep-code publish --workflow-config wf.yaml -m workflow
245+
"""
246+
mode = mode.lower()
247+
ds_path, wf_path = _assign_configs(
248+
dataset_config,
249+
workflow_config,
250+
mode, # type: ignore[arg-type]
251+
dataset_config_opt,
252+
workflow_config_opt,
253+
)
254+
255+
_validate_inputs(ds_path, wf_path, mode)
256+
65257
publisher = Publisher(
66-
dataset_config_path=dataset_config,
67-
workflow_config_path=workflow_config,
258+
dataset_config_path=ds_path,
259+
workflow_config_path=wf_path,
68260
environment=environment.lower(),
69261
)
70-
result = publisher.publish(mode=mode.lower())
71-
click.echo(f"Pull request created: {result}")
262+
result = publisher.publish(mode=mode)
263+
264+
click.echo(result if isinstance(result, str) else "Wrote files locally.")

deep_code/tools/publish.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,9 @@ def publish(
475475
files.update(ds_files)
476476

477477
if mode in ("workflow", "all"):
478-
wf_files = self.generate_workflow_experiment_records(write_to_file=False)
478+
wf_files = self.generate_workflow_experiment_records(
479+
write_to_file=False, mode=mode
480+
)
479481
files.update(wf_files)
480482

481483
if not files:

0 commit comments

Comments
 (0)