Add: template cli

Lars-Kraemer · Lars-Kraemer · commit adf305307207 · 2025-09-18T12:02:30.000+02:00
diff --git a/README.md b/README.md
@@ -424,6 +424,7 @@ print(fm[0])                      # > data/stacked_images/case_1_image | gives t
 - Config is a YAML **template for datasets**: always starts with a `name`.
 - Add one or more **layers** (e.g., `image`, `semseg`, `multilabel`) with fields like `path`, `file_type`, `channels`/`classes`.
 - **Optional splits**: define `train/val/test` overrides or point to a `splits_file`.
+- See [`examples/template.yaml`](examples/template.yaml) and/or run `vidata_template` for a template
 
 <details>
 <summary> Expand for Full Details </summary>
@@ -537,17 +538,17 @@ print("Classes in labels:", tm.class_ids(arr))
 
 # Data Analysis
 
-The `data_analyze` CLI computes dataset statistics and writes them to the
+The `vidata_analyze` CLI computes dataset statistics and writes them to the
 specified output directory. Results include:
 
 - **Image statistics**: sizes, resolutions, intensity distributions
 - **Label statistics**: class counts, frequencies, co-occurrence
 - **Split summaries**: optional per-split analysis
 
 ```bash
-data_analyze -c path/to/datasets/*.yaml  -o <outputdir>
+vidata_analyze -c path/to/datasets/*.yaml  -o <outputdir>
 # Analyze a specific split/fold
-data_analyze -c path/to/datasets/*.yaml  -o <outputdir> -s <split> -f <fold>
+vidata_analyze -c path/to/datasets/*.yaml  -o <outputdir> -s <split> -f <fold>
 ```
 
 # Data Inspection
@@ -562,9 +563,9 @@ pip install napari-data-inspection[all]
 Run the following
 
 ```bash
-data_inspections -c path/to/datasets/*.yaml
+data_inspection -c path/to/datasets/*.yaml
 # Inspect a specific split/fold
-data_inspections -c path/to/datasets/*.yaml  -s <split> -f <fold>
+data_inspection -c path/to/datasets/*.yaml  -s <split> -f <fold>
 ```
 
 # Acknowledgments
diff --git a/pyproject.toml b/pyproject.toml
@@ -120,3 +120,8 @@ fix = true
 [tool.pytest.ini_options]
 addopts = "--basetemp=./tests/temp/"
 testpaths = ["tests"]
+
+
+[project.scripts]
+vidata_analyze = "vidata.cli.analyze:main"
+vidata_template = "vidata.cli.template:main"
diff --git a/src/vidata/cli/template.py b/src/vidata/cli/template.py
@@ -0,0 +1,94 @@
+from pathlib import Path
+
+from omegaconf import OmegaConf
+
+
+def main():
+    print("=== YAML Template Creator ===")
+    project_name = input("Project Name: ")
+    if project_name == "":
+        raise Exception("Project Name cannot be empty")
+
+    output_path = Path.cwd() / (project_name + ".yaml")
+    if output_path.exists() and not input("Project exists, overwrite? (Y/N)").lower() == "y":
+        raise FileExistsError(f"Output path already exists: {output_path}")
+
+    n_ilayers = input("Number of Image layers: ")
+    n_ilayers = None if n_ilayers == "" else int(n_ilayers)
+
+    n_llayers = input("Number of Label layers: ")
+    n_llayers = None if n_llayers == "" else int(n_llayers)
+
+    f_type = input("File Type (e.g. .nii.gz, .png): ")
+    f_type = None if f_type == "" else f_type
+    if n_ilayers is not None and n_ilayers > 0:
+        n_channels = input("Number of Image Channels: ")
+        n_channels = None if n_channels == "" else int(n_channels)
+    else:
+        n_channels = "TODO"
+    if n_llayers is not None and n_llayers > 0:
+        n_classes = input("Number of Label Classes: ")
+        n_classes = None if n_classes == "" else int(n_classes)
+        task = input("Semantic Segmentation(S)/MultilabelSegmentation(M): ")
+        if task.lower() == "s":
+            task = "semseg"
+        elif task.lower() == "m":
+            task = "multilabel"
+        else:
+            task = "TODO - semseg|multilabel"
+    else:
+        n_classes = "TODO"
+        task = "TODO - semseg|multilabel"
+
+    split = input("Create Split Template (Y/N): ")
+    split = split.lower() == "y"
+
+    config = {"name": project_name}
+    layers_i = [
+        {
+            "name": f"ImageLayer{i + 1}",
+            "type": "image",  # change to "labels"/"points" if needed
+            "path": "TODO",
+            "file_type": f_type,
+            "pattern": None,
+            "backend": None,
+            "channel": n_channels,  # optional
+            "file_stack": False,
+        }
+        for i in range(n_ilayers)
+    ]
+    layers_l = [
+        {
+            "name": f"LabelLayer{i + 1}",
+            "type": task,
+            "path": "TODO",
+            "file_type": f_type,
+            "pattern": None,
+            "backend": None,
+            "classes": n_classes,
+            "file_stack": False,
+            "ignore_bg": None,
+            "ignore_index": None,
+        }
+        for i in range(n_llayers)
+    ]
+    config["layers"] = layers_i + layers_l
+
+    if split:
+        config["split"] = {"splits_file": None, "train": None, "val": None, "test": None}
+        layer_names = {}
+        for layer in config["layers"]:
+            layer_names[layer["name"]] = None
+        config["split"]["train"] = layer_names
+        config["split"]["val"] = layer_names
+        config["split"]["test"] = layer_names
+
+    OmegaConf.save(config, output_path)
+    print(f"✔ Wrote template to: {output_path}")
+    print(" - Fill out all 'TODO'")
+    print(" - Optional - rename the layers")
+    print(" - Optional - 'null' entries are optional, you can change or delete them")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/vidata/cli/verify_config.py b/src/vidata/cli/verify_config.py
diff --git a/src/vidata/config_manager.py b/src/vidata/config_manager.py
@@ -236,7 +236,7 @@ def config(self, split: str | None = None, fold: int | None = None):
             _cfg["include_names"] = self.resolve_splits_file(split, fold)
         return _cfg
 
-    def resolve_splits_file(self, split: str, fold: int | None = None):
+    def resolve_splits_file(self, split: str, fold: int | None = None) -> list[str]:
         if self.splits_file is None:
             raise ValueError(f"no splits file defined for {self.name}")
         splits = load_json(self.splits_file)
@@ -254,7 +254,10 @@ def resolve_splits_file(self, split: str, fold: int | None = None):
 
         if split not in splits:
             raise ValueError(f"split {split} is not in splits_file with keys {list(splits.keys())}")
-        return splits[split]
+
+        resolved = splits[split]
+        assert isinstance(resolved, list)  # Should be a list of files
+        return resolved
 
     def file_manager(self, split: str | None = None, fold: int | None = None) -> FileManager:
         _cfg = self.config(split=split)
@@ -319,8 +322,8 @@ def task_manager(self) -> TaskManager:
 
 
 class ConfigManager:
-    def __init__(self, config: dict | DictConfig | str):
-        if isinstance(config, str):
+    def __init__(self, config: dict | DictConfig | str | Path):
+        if isinstance(config, (str | Path)):
             self.config = OmegaConf.load(config)
         else:
             self.config = config
@@ -353,24 +356,3 @@ def layer_names(self):
 
     def __len__(self):
         return len(self.layers)
-
-
-if __name__ == "__main__":
-
-    path = "../../../dataset_cfg/Cityscapes.yaml"
-    cfg = dict(OmegaConf.load(path))
-    print(cfg)
-    cm = ConfigManager(cfg)
-    for key in cm.layer_names():
-        layer = cm.layer(key)
-        fm = layer.file_manager()
-        for i in fm:
-            print(i)
-        break
-        # for layer in cm.layers:
-        # for split in ["train", "val"]:
-        #     fm=layer.file_manager(split,fold=0)
-        #     print(layer.name,split,len(fm))
-        #     dalo=layer.data_loader()
-        #     data,meta=dalo.load(fm[0])
-        #     print(layer.name, split, data.shape)