dirac-institute
diff --git a/‎.github/workflows/run-tests.yml‎
Lines changed: 28 additions & 0 deletions b/‎.github/workflows/run-tests.yml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 67 additions & 1 deletion b/‎README.md‎
Lines changed: 67 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 44 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎src/get_lsst_refcats/__init__.py‎ b/‎src/get_lsst_refcats/__init__.py‎
diff --git a/‎src/get_lsst_refcats/_version.py‎
Lines changed: 34 additions & 0 deletions b/‎src/get_lsst_refcats/_version.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎src/get_lsst_refcats/trim.py‎
Lines changed: 128 additions & 0 deletions b/‎src/get_lsst_refcats/trim.py‎
Lines changed: 128 additions & 0 deletions
@@ -0,0 +1,28 @@
+name: Run pytest
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install '.[dev]'
+    - name: Test with pytest
+      run: |
+        pytest
@@ -1 +1,67 @@
-# lsst_refcats
+# lsst_refcats
+
+Download (trimmed) GaiaDR3 / GaiaDR2 / Pan-STARRS1 reference catalogs for use with the LSST Science Pipelines.
+
+Requires the LSST Science Pipelines as a dependency: https://pipelines.lsst.io/
+- This package has been tested with version `w_2024_34` of the Science Pipelines. 
+
+Install:
+```
+$ python -m pip install get-lsst-refcats
+```
+
+Usage:
+```
+usage: lsst-refcats [-h] [--output OUTPUT] [--paths PATHS [PATHS ...]] [--repo REPO] [--dataset DATASET] [--collections COLLECTIONS]
+                    [--where WHERE] [--refcat_indexer REFCAT_INDEXER] [--pixel_margin PIXEL_MARGIN] [--log-level LOG_LEVEL]
+                    [--export-run EXPORT_RUN] [--export-dataset-name EXPORT_DATASET_NAME] [--import-file] [--processes PROCESSES]
+                    refcat_name
+
+positional arguments:
+  refcat_name           The reference catalog name
+
+options:
+  -h, --help            show this help message and exit
+  --output OUTPUT, -o OUTPUT
+                        The output file to write the trimmed refcat YAML to (default: data/refcats)
+  --paths PATHS [PATHS ...], -p PATHS [PATHS ...]
+                        The paths to fits files to search (default: [])
+  --repo REPO, -b REPO  The repo to search for exposures (default: None)
+  --dataset DATASET     The dataset name to search if using repo (default: None)
+  --collections COLLECTIONS
+                        The collections to search if using repo (default: None)
+  --where WHERE         A constraint for the dataset search if using repo (default: )
+  --refcat_indexer REFCAT_INDEXER
+                        The refcat indexer (default: HTM)
+  --pixel_margin PIXEL_MARGIN
+                        The pixel margin for determining overlapping refcat shards (default: 300)
+  --log-level LOG_LEVEL
+                        The logging level, one of DEBUG, INFO, WARN, ERROR (default: INFO)
+  --export-run EXPORT_RUN
+                        The RUN collection name to export collections into (default: refcats)
+  --export-dataset-name EXPORT_DATASET_NAME
+                        The dataset name to use for exported datasets (default: None)
+  --import-file         Make import ECSV file (new style) instead of YAML export file (default: False)
+  --processes PROCESSES, -J PROCESSES
+                        Number of processes to use for opening fits files or loading dataset refs (default: 8)
+```
+
+Example:
+```
+$ lsst-refcats gaiadr3 --paths image.fits.fz --import-file
+$ butler create ./repo
+$ butler register-dataset-type ./repo gaia_dr3_20230707 SimpleCatalog htm7
+$ butler ingest-files ./repo gaia_dr3_20230707 refcats/gaia_dr3 ./data/refcats/gaia_dr3_20230707.ecsv
+lsst.daf.butler.script.ingest_files INFO: Ingesting 27 dataset ref(s) from 27 file(s)
+$ butler query-datasets ./repo gaia_dr3_20230707 --collections "refcats/gaia_dr3"
+lsst.daf.butler.script.queryDatasets INFO: Processing 1 dataset type
+
+       type             run                         id                   htm7 
+----------------- ---------------- ------------------------------------ ------
+gaia_dr3_20230707 refcats/gaia_dr3 ae761abd-3d9a-4ede-a21d-bb9b757af459 188496
+...
+```
+
+Credits:
+
+Code authored by [stevenstetzler](https://github.com/stevenstetzler/) and [DinoBektesevic](https://github.com/DinoBektesevic/).
@@ -0,0 +1,44 @@
+[build-system]
+requires = ["setuptools>=80", "setuptools-scm>=8"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "get-lsst-refcats"
+license = {file = "LICENSE"}
+readme = "README.md"
+authors = [
+    { name = "Steven Stetzler", email = "steven.stetzler@gmail.com" }
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "License :: OSI Approved :: MIT License",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+]
+dynamic = ["version"]
+requires-python = ">=3.9"
+dependencies = [
+    "astropy",
+    "requests",
+    "joblib",
+]
+description = "Get reference catalogs for the LSST Science Pipelines."
+
+[project.urls]
+"Source Code" = "https://github.com/stevenstetzler/get-lsst-refcats"
+
+[project.scripts]
+lsst-refcats = "get_lsst_refcats.trim:main"
+
+[tool.setuptools_scm]
+write_to = "src/get_lsst_refcats/_version.py"
+version_scheme = "guess-next-dev"
+local_scheme = "no-local-version"
+
+[project.optional-dependencies]
+dev = [
+    "pytest"
+]
+
@@ -0,0 +1,34 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+
+__version__ = version = '0.1.dev1'
+__version_tuple__ = version_tuple = (0, 1, 'dev1')
+
+__commit_id__ = commit_id = 'ge14e856fa'
@@ -0,0 +1,128 @@
+from .utils import deferred_import, refcat_name_to_dataset_name, resolve_bbox2shard_ids, create_bbox_and_wcs_from_decam_fits, resolve_exposure_shard_ids, load_refcat_yaml, make_refcat_import
+import argparse
+import os
+import yaml
+import logging
+import sys
+import joblib
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(format="[%(levelname)s:%(filename)s:%(lineno)s - %(funcName)5s()] %(message)s")
+
+
+def main():
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("refcat_name", type=str, help="The reference catalog name")
+    parser.add_argument("--output", "-o", type=str, default="data/refcats", help="The output file to write the trimmed refcat YAML to")
+    parser.add_argument("--paths", "-p", nargs="+", type=str, default=[], help="The paths to fits files to search")
+    parser.add_argument("--repo", "-b", type=str, default=None, help="The repo to search for exposures")
+    parser.add_argument("--dataset", default=None, help="The dataset name to search if using repo")
+    parser.add_argument("--collections", default=None, help="The collections to search if using repo")
+    parser.add_argument("--where", default="", help="A constraint for the dataset search if using repo")
+    parser.add_argument("--refcat_indexer", default="HTM", help="The refcat indexer")
+    parser.add_argument("--pixel_margin", type=int, default=300, help="The pixel margin for determining overlapping refcat shards")
+    parser.add_argument("--log-level", help="The logging level, one of DEBUG, INFO, WARN, ERROR", default="INFO")
+    parser.add_argument("--export-run", type=str, default="refcats", help="The RUN collection name to export collections into")
+    parser.add_argument("--export-dataset-name", type=str, default=None, help="The dataset name to use for exported datasets")
+    parser.add_argument("--import-file", action="store_true", help="Make import ECSV file (new style) instead of YAML export file")
+    parser.add_argument("--processes", "-J", type=int, default=8, help="Number of processes to use for opening fits files or loading dataset refs")
+
+    args = parser.parse_args()
+    if args.repo is not None and args.dataset is None:
+        raise ValueError("must use argument --dataset if specifying repo")
+    logger.setLevel(getattr(logging, args.log_level))
+
+    os.makedirs(args.output, exist_ok=True)
+
+    deferred_import("lsst.meas.algorithms", "measAlgs", ns=globals())
+    refCatConf = measAlgs.DatasetConfig()
+    ref_dataset_name  = refcat_name_to_dataset_name.get(args.refcat_name, None)
+    if ref_dataset_name is None:
+        raise ValueError(f"{args.refcat_name} is not an alias for any dataset name, use one of {list(refcat_name_to_dataset_name.keys())}")
+    refCatConf.ref_dataset_name = ref_dataset_name
+    if args.refcat_indexer != "HTM":
+        raise ValueError(f"refcat indexer {args.refcat_indexer} is not supported")
+    refCatConf.indexer = args.refcat_indexer
+
+    if args.export_dataset_name is None:
+        export_dataset_name = ref_dataset_name
+    else:
+        export_dataset_name = args.export_dataset_name
+
+    bboxes = []
+    wcss = []
+    if args.paths:
+        def work(path):
+            import logging
+            logging.basicConfig()
+            logger = logging.getLogger(__name__)
+            logger.setLevel(getattr(logging, args.log_level))
+            logger.info(f"loading fits {path}")
+            return create_bbox_and_wcs_from_decam_fits(path)
+        
+        results = joblib.Parallel(n_jobs=args.processes)(joblib.delayed(work)(path) for path in args.paths)
+        for bbox, wcs in results:
+            bboxes.extend(bbox)
+            wcss.extend(wcs)
+    
+    # for path in args.paths:
+    #     logger.info(f"loading fits {path}")
+    #     bbox, wcs = create_bbox_and_wcs_from_decam_fits(path)
+    #     bboxes.extend(bbox)
+    #     wcss.extend(wcs)
+    
+    if args.repo:
+        deferred_import("lsst.daf.butler", "dafButler", ns=globals())
+        butler = dafButler.Butler(args.repo, collections=args.collections)
+        refs = butler.registry.queryDatasets(args.dataset, where=args.where)
+        def work(ref):
+            import logging
+            logging.basicConfig()
+            logger = logging.getLogger(__name__)
+            logger.setLevel(getattr(logging, args.log_level))
+            logger.info(f"loading dataset {ref}")
+
+            wcs = butler.get(f"{args.dataset}.wcs", ref.dataId, collections=ref.run)
+            bbox = butler.get(f"{args.dataset}.detector", ref.dataId, collections=ref.run).getBBox()
+            return bbox, wcs
+        
+        results = joblib.Parallel(n_jobs=args.processes)(joblib.delayed(work)(ref) for ref in refs)
+        for bbox, wcs in results:
+            bboxes.append(bbox)
+            wcss.append(wcs)
+
+        # for ref in refs:
+        #     logger.info(f"loading dataset {ref}")
+        #     bboxes.append(bbox)
+        #     wcss.append(wcs)
+    
+    shards = []
+    for bbox, wcs in zip(bboxes, wcss):
+        shards.extend(resolve_bbox2shard_ids(refCatConf, bbox, wcs, pixelMargin=args.pixel_margin))
+
+    shards = list(set(shards))
+    logger.info("shards: %s", shards)
+    if args.import_file:
+        import_table = make_refcat_import(ref_dataset_name, shards, args.output)
+        import_table.write(os.path.join(args.output, ref_dataset_name + ".ecsv"), format="ascii.ecsv")
+    else:
+        # load the full yaml and trim to include just the chosen shards
+        logger.info(f"loading refcat for {ref_dataset_name}")
+        refcat = load_refcat_yaml(ref_dataset_name)
+        datasets = list(filter(lambda d : d['type'] == "dataset", refcat['data']))[0]
+        collection = list(filter(lambda d : d['type'] == "collection", refcat['data']))[0]
+        dataset_type = list(filter(lambda d : d['type'] == "dataset_type", refcat['data']))[0]
+        collection['name'] = args.export_run
+        dataset_type['name'] = export_dataset_name
+        logger.info("trimming records")
+        records = list(filter(lambda rec : rec['data_id'][0]['htm7'] in shards, datasets['records']))
+        datasets['records'] = records
+        datasets['run'] = args.export_run
+        datasets['dataset_type'] = export_dataset_name
+        refcat['data'] = [collection, dataset_type, datasets]
+
+        print(yaml.dump(refcat))
+
+if __name__ == "__main__":
+    main()
+