Skip to content

Commit aa285f6

Browse files
Merge pull request #126 from crim-ca/simplify-populator-discovery
Simplify populator discovery
2 parents 2a5232d + 035c966 commit aa285f6

File tree

17 files changed

+257
-383
lines changed

17 files changed

+257
-383
lines changed

CHANGES.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@
22

33
## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)
44

5-
<!-- insert list items of new changes here -->
5+
* Simplify populator implementation discovery and reduce boilerplate for new implementations.
6+
* Add a classmethod to add additional CLI args to the `STACpopulatorBase` class (replaces `add_parser_args`)
7+
* Add a classmethod to run the populator based on CLI args to the `STACpopulatorBase` class (replaces `runner`)
8+
* Add shared implementations for these two classmethods to a `THREDDSpopulatorBase` class that can be
9+
used by all populators that pull data from THREDDS
10+
* Rename `populator_base.py` to `populators.py` since it now contains more than just the base populator
11+
* No longer require new implementations to list populator modules in `implementations.__init__.py`
12+
* Implementation discovery is now done by keeping track of concrete implementations of the STACpopulatorBase class
613

714
## [0.12.0](https://github.com/crim-ca/stac-populator/tree/0.12.0) (2025-11-20)
815

STACpopulator/cli.py

Lines changed: 23 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,22 @@
33
import importlib
44
import sys
55
import warnings
6-
from types import ModuleType
6+
from pathlib import Path
77
from typing import get_args
88

99
import pystac
1010
import requests
1111

12-
from STACpopulator import __version__, implementations
13-
from STACpopulator.collection_update import UpdateModes, UpdateModesOptional, update_api_collection
12+
import STACpopulator.implementations
13+
from STACpopulator import __version__
14+
from STACpopulator.collection_update import UpdateModes, update_api_collection
1415
from STACpopulator.exceptions import STACPopulatorError
1516
from STACpopulator.export import export_catalog
1617
from STACpopulator.log import add_logging_options, setup_logging
18+
from STACpopulator.populators import STACpopulatorBase
1719
from STACpopulator.request_utils import add_request_options, apply_request_options
1820

1921

20-
def _extra_parser_argument(arg: str) -> tuple[str, str]:
21-
if "=" in arg:
22-
return tuple(a.strip() for a in arg.split("=", 1))
23-
raise argparse.ArgumentTypeError("--extra-parser-arguments must be in the form 'key=value'")
24-
25-
2622
def add_parser_args(parser: argparse.ArgumentParser) -> None:
2723
"""Add parser arguments to the argument parser."""
2824
parser.add_argument(
@@ -48,52 +44,10 @@ def add_parser_args(parser: argparse.ArgumentParser) -> None:
4844
populators_subparser = run_parser.add_subparsers(
4945
title="populator", dest="populator", description="Implementation to run."
5046
)
51-
for implementation_module_name, module in implementation_modules().items():
52-
implementation_parser = populators_subparser.add_parser(implementation_module_name)
53-
module.add_parser_args(implementation_parser)
54-
implementation_parser.add_argument(
55-
"-x",
56-
"--extra-item-parsers",
57-
action="append",
58-
help="Functions that may modify items before upload. "
59-
"Should be specified in the form 'module:function_name' "
60-
"and have the signature function(item: dict, **kw)",
61-
)
62-
implementation_parser.add_argument(
63-
"-X",
64-
"--extra-collection-parsers",
65-
action="append",
66-
help="Functions that may modify collections before upload. "
67-
"Should be specified in the form 'module:function_name' or "
68-
"path/to/python/file.py:function_name. Functions should "
69-
"have the signature function(collection: dict, **kw) -> None "
70-
"and should modify the collection dict in place.",
71-
)
72-
implementation_parser.add_argument(
73-
"-a",
74-
"--extra-parser-arguments",
75-
action="append",
76-
type=_extra_parser_argument,
77-
help="Extra keyword arguments that should be passed to extra "
78-
"item and collection function as "
79-
"keyword arguments. "
80-
"Should be specified in the form 'key=value'",
81-
)
82-
implementation_parser.add_argument(
83-
"--update-collection-mode",
84-
dest="update_collection",
85-
choices=get_args(UpdateModesOptional),
86-
default="none",
87-
help="Update collection information based on new items created or updated by this populator. "
88-
"Only applies if --update is also set.",
89-
)
90-
implementation_parser.add_argument(
91-
"--exclude-summary",
92-
nargs="*",
93-
action="extend",
94-
default=[],
95-
help="Exclude these properties when updating collection summaries. ",
96-
)
47+
for name, populator in populators().items():
48+
implementation_parser = populators_subparser.add_parser(name)
49+
implementation_parser.description = getattr(populator, "description", name)
50+
populator.update_parser_args(implementation_parser)
9751
update_parser = commands_subparser.add_parser(
9852
"update-collection", description="Update collection information based on items in the collection"
9953
)
@@ -123,21 +77,24 @@ def add_parser_args(parser: argparse.ArgumentParser) -> None:
12377

12478

12579
@functools.cache
126-
def implementation_modules() -> dict[str, ModuleType]:
80+
def populators() -> dict[str, STACpopulatorBase]:
12781
"""
12882
Try to load implementations.
12983
13084
If one fails (i.e. due to missing dependencies) continue loading others.
13185
"""
132-
modules = {}
133-
for implementation_module_name in implementations.__all__:
134-
try:
135-
modules[implementation_module_name] = importlib.import_module(
136-
f".{implementation_module_name}", implementations.__package__
137-
)
138-
except STACPopulatorError as e:
139-
warnings.warn(f"Could not load extension {implementation_module_name} because of error {e}")
140-
return modules
86+
impl_path = Path(STACpopulator.implementations.__path__[0])
87+
for path in impl_path.glob("**/*.py"):
88+
if path.name == "__init__.py":
89+
path = path.parent
90+
rel_path = path.relative_to(impl_path)
91+
if str(rel_path) != ".":
92+
module_path = str(rel_path.with_suffix("")).replace("/", ".").replace("-", "_")
93+
try:
94+
importlib.import_module(f"STACpopulator.implementations.{module_path}")
95+
except STACPopulatorError as e:
96+
warnings.warn(f"Could not load extension {rel_path} because of error {e}")
97+
return {getattr(klass, "name", klass.__name__): klass for klass in STACpopulatorBase.concrete_subclasses()}
14198

14299

143100
def run(ns: argparse.Namespace) -> int:
@@ -148,7 +105,7 @@ def run(ns: argparse.Namespace) -> int:
148105
if ns.command == "run":
149106
if ns.stac_version:
150107
pystac.set_stac_version(ns.stac_version)
151-
return implementation_modules()[ns.populator].runner(ns, session) or 0
108+
return populators()[ns.populator].run(ns, session) or 0
152109
elif ns.command == "update_collection":
153110
return update_api_collection(ns.mode, ns.stac_collection_uri, ns.exclude_summary) or 0
154111
else:
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
from .add_CMIP6 import add_parser_args, runner
2-
3-
__all__ = ["add_parser_args", "runner"]
Lines changed: 5 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
import argparse
21
import json
32
import logging
43
from typing import Any, MutableMapping, Union
54

65
from pystac import STACValidationError
76
from pystac.extensions.datacube import DatacubeExtension
8-
from requests.sessions import Session
97

108
from STACpopulator.extensions.cmip6 import CMIP6Helper, CMIP6Properties
119
from STACpopulator.extensions.datacube import DataCubeHelper
1210
from STACpopulator.extensions.thredds import THREDDSExtension, THREDDSHelper
13-
from STACpopulator.input import ErrorLoader, THREDDSLoader
1411
from STACpopulator.models import GeoJSONPolygon
15-
from STACpopulator.populator_base import STACpopulatorBase
12+
from STACpopulator.populators import THREDDSPopulator
1613

1714
LOGGER = logging.getLogger(__name__)
1815

1916

20-
class CMIP6populator(STACpopulatorBase):
17+
class CMIP6populator(THREDDSPopulator):
2118
"""Populator that creates STAC objects representing CMIP6 data from a THREDDS catalog."""
2219

20+
name = "CMIP6_UofT"
21+
description = "CMIP6 STAC populator from a THREDDS catalog or NCML XML."
22+
2323
item_properties_model = CMIP6Properties
2424
item_geometry_model = GeoJSONPolygon
2525

@@ -64,51 +64,3 @@ def create_stac_item(
6464

6565
# print(json.dumps(item.to_dict()))
6666
return json.loads(json.dumps(item.to_dict()))
67-
68-
69-
def add_parser_args(parser: argparse.ArgumentParser) -> None:
70-
"""Add additional CLI arguments to the argument parser."""
71-
parser.description = "CMIP6 STAC populator from a THREDDS catalog or NCML XML."
72-
parser.add_argument("stac_host", help="STAC API URL")
73-
parser.add_argument("href", help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
74-
parser.add_argument("--update", action="store_true", help="Update collection and its items")
75-
parser.add_argument(
76-
"--mode",
77-
choices=["full", "single"],
78-
default="full",
79-
help="Operation mode, processing the full dataset or only the single reference.",
80-
)
81-
parser.add_argument(
82-
"--config",
83-
type=str,
84-
help=(
85-
"Override configuration file for the populator. "
86-
"By default, uses the adjacent configuration to the implementation class."
87-
),
88-
)
89-
90-
91-
def runner(ns: argparse.Namespace, session: Session) -> int:
92-
"""Run the populator."""
93-
LOGGER.info(f"Arguments to call: {vars(ns)}")
94-
95-
if ns.mode == "full":
96-
data_loader = THREDDSLoader(ns.href, session=session)
97-
else:
98-
# To be implemented
99-
data_loader = ErrorLoader()
100-
101-
c = CMIP6populator(
102-
ns.stac_host,
103-
data_loader,
104-
update=ns.update,
105-
session=session,
106-
config_file=ns.config,
107-
extra_item_parsers=ns.extra_item_parsers,
108-
extra_collection_parsers=ns.extra_collection_parsers,
109-
extra_parser_arguments=ns.extra_parser_arguments,
110-
update_collection=ns.update_collection,
111-
exclude_summaries=ns.exclude_summary,
112-
)
113-
c.ingest()
114-
return 0
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
from STACpopulator.implementations.CORDEXCMIP6_Ouranos.add_CORDEX6 import add_parser_args, runner
2-
3-
__all__ = ["add_parser_args", "runner"]
Lines changed: 5 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,22 @@
1-
import argparse
21
import logging
32
from typing import Any
43

5-
from requests.sessions import Session
6-
74
from STACpopulator.extensions.cordex6 import Cordex6DataModel
8-
from STACpopulator.input import ErrorLoader, THREDDSLoader
9-
from STACpopulator.populator_base import STACpopulatorBase
5+
from STACpopulator.populators import THREDDSPopulator
106

117
LOGGER = logging.getLogger(__name__)
128

139

14-
class CORDEX_STAC_Populator(STACpopulatorBase):
10+
class CORDEX_STAC_Populator(THREDDSPopulator):
1511
"""Populator that creates STAC objects representing CORDEX data from a THREDDS catalog."""
1612

13+
name = "CORDEXCMIP6_Ouranos"
14+
description = "CMIP6-CORDEX STAC populator from a THREDDS catalog or NCML XML."
15+
1716
data_model = Cordex6DataModel
1817
item_geometry_model = None # Unnecessary, but kept for consistency
1918

2019
def create_stac_item(self, item_name: str, item_data: dict[str, Any]) -> dict[str, Any]:
2120
"""Return a STAC item."""
2221
dm = self.data_model.from_data(item_data)
2322
return dm.stac_item()
24-
25-
26-
# TODO: This probably doesn't need to be copied for every implementation, right ?
27-
def add_parser_args(parser: argparse.ArgumentParser) -> None:
28-
"""Add additional CLI arguments to the argument parser."""
29-
parser.description = "CMIP6-CORDEX STAC populator from a THREDDS catalog or NCML XML."
30-
parser.add_argument("stac_host", help="STAC API URL")
31-
parser.add_argument("href", help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
32-
parser.add_argument("--update", action="store_true", help="Update collection and its items")
33-
parser.add_argument(
34-
"--mode",
35-
choices=["full", "single"],
36-
default="full",
37-
help="Operation mode, processing the full dataset or only the single reference.",
38-
)
39-
parser.add_argument(
40-
"--config",
41-
type=str,
42-
help=(
43-
"Override configuration file for the populator. "
44-
"By default, uses the adjacent configuration to the implementation class."
45-
),
46-
)
47-
48-
49-
def runner(ns: argparse.Namespace, session: Session) -> int:
50-
"""Run the populator."""
51-
LOGGER.info(f"Arguments to call: {vars(ns)}")
52-
53-
if ns.mode == "full":
54-
data_loader = THREDDSLoader(ns.href, session=session)
55-
else:
56-
# To be implemented
57-
data_loader = ErrorLoader()
58-
59-
c = CORDEX_STAC_Populator(
60-
ns.stac_host,
61-
data_loader,
62-
update=ns.update,
63-
session=session,
64-
config_file=ns.config,
65-
update_collection=ns.update_collection,
66-
exclude_summaries=ns.exclude_summary,
67-
)
68-
c.ingest()
69-
return 0
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
from STACpopulator.implementations.DirectoryLoader.crawl_directory import add_parser_args, runner
2-
3-
__all__ = ["add_parser_args", "runner"]

0 commit comments

Comments
 (0)