Merge pull request #629 from lincc-frameworks/lsst-band-auto-detect-fix

aritraghsh09 · web-flow · commit 5ce3d93de34b · 2026-01-27T10:57:23.000-08:00
Fix _get_available_bands_from_manifest to find complete band entries
diff --git a/pyproject.toml b/pyproject.toml
@@ -85,6 +85,7 @@ dev = [
     "sphinx-togglebutton",
     "sphinx-rtd-theme",
     "lsdb", # Used to test lsst dataset classes
+    "cdshealpix <= 0.7.1",
 ]
 
 [build-system]
diff --git a/src/hyrax/data_sets/downloaded_lsst_dataset.py b/src/hyrax/data_sets/downloaded_lsst_dataset.py
@@ -371,17 +371,33 @@ def _longest_object_id_idx(self):
         return np.argmax([len(str(id)) for id in object_ids])
 
     def _get_available_bands_from_manifest(self, manifest):
-        """Best effort to get available bands by looking at first 10 successful downloads for consistency."""
+        """Get available bands by finding entries with complete band coverage.
+
+        Uses cutout_shape[0] to determine the expected number of bands, then finds
+        entries where downloaded_bands has that many entries (i.e., complete downloads).
+        """
         if len(manifest) == 0:
             return None, None
 
-        successful_entries = []
+        # First, find the expected number of bands from cutout_shape
+        # Look for the first entry with a valid cutout_shape
+        expected_band_count = None
+        for i in range(min(len(manifest), 1000)):
+            shape = manifest["cutout_shape"][i]
+            if shape is not None and len(shape) > 0 and shape[0] > 0:
+                expected_band_count = shape[0]
+                break
+
+        if expected_band_count is None:
+            # No valid cutout_shape found
+            return None, None
 
-        # Attempt to find first 10 successful downloads.
-        # For long manifests (e.g. 1 million undownloaded cutouts), avoid iterating too far to find these 10.
+        # Now find first 5 entries where downloaded_bands has the expected count
+        complete_entries = []
         give_up_idx = min(len(manifest), 1000)
+
         for i in range(give_up_idx):
-            if len(successful_entries) >= 10:
+            if len(complete_entries) >= 5:
                 break
 
             filename = manifest["filename"][i]
@@ -395,19 +411,26 @@ def _get_available_bands_from_manifest(self, manifest):
                 and str(downloaded_bands_str).strip()
             ):
                 bands = [b.strip() for b in str(downloaded_bands_str).split(",") if b.strip()]
-                if bands:  # Non-empty band list
-                    successful_entries.append(bands)
-
-        if not successful_entries:
-            return None, None
+                # Only include entries with complete band coverage
+                if len(bands) == expected_band_count:
+                    complete_entries.append(bands)
+
+        if not complete_entries:
+            raise RuntimeError(
+                f"We checked the first 1000 manifest entries and found no entries with complete band"
+                f"coverage. Expected {expected_band_count} bands based on cutout_shape, but less than 5"
+                f"downloaded entries have all bands present. Cannot automatically determine consistent"
+                f"band structure."
+            )
 
-        # Check that all successful entries have identical band lists
-        first_bands = successful_entries[0]
-        for i, bands in enumerate(successful_entries[1:], 1):
+        # Check that all complete entries have identical band lists
+        first_bands = complete_entries[0]
+        for i, bands in enumerate(complete_entries[1:], 1):
             if bands != first_bands:
                 raise RuntimeError(
-                    f"Inconsistent band ordering in manifest. Entry 0 has {first_bands}, "
-                    f"but entry {i} has {bands}. Cannot determine consistent band structure."
+                    f"Inconsistent band ordering in manifest among complete downloads. "
+                    f"Entry 0 has {first_bands}, but entry {i} has {bands}. "
+                    f"Cannot determine consistent band structure."
                 )
 
         return set(first_bands), first_bands
diff --git a/tests/hyrax/mocks/lsst_butler_mocks.py b/tests/hyrax/mocks/lsst_butler_mocks.py
@@ -403,9 +403,17 @@ class MockButler:
     band_fail_prob = {}
     fail_after_n = 0
     band_fail_after_n = {}
+    band_fail_before_n = {}
 
     @classmethod
-    def reset(cls, fail_prob=0.0, band_fail_prob=None, fail_after_n=0, band_fail_after_n=None):
+    def reset(
+        cls,
+        fail_prob=0.0,
+        band_fail_prob=None,
+        fail_after_n=0,
+        band_fail_after_n=None,
+        band_fail_before_n=None,
+    ):
         """Resets the mock butler for a new test, and configures failure behavior
 
         Parameters
@@ -423,12 +431,17 @@ def reset(cls, fail_prob=0.0, band_fail_prob=None, fail_after_n=0, band_fail_aft
             Continually fail particular band(s) after the provided number of calls to butler.get in the
             particular band. Dictionary provided has bands as keys and counts as values.
             Counts of zero mean no failures for that band
+        band_fail_before_n : dict, optional
+            Fail particular band(s) for the first N calls, then succeed. Dictionary provided has bands
+            as keys and counts as values. For example band_fail_before_n={"g": 5} would cause the
+            first 5 gets to g band to fail, then succeed afterwards.
         """
         cls.initialized_thread_ids = []
         cls.fail_prob = fail_prob
         cls.band_fail_prob = {} if band_fail_prob is None else band_fail_prob
         cls.fail_after_n = fail_after_n
         cls.band_fail_after_n = {} if band_fail_after_n is None else band_fail_after_n
+        cls.band_fail_before_n = {} if band_fail_before_n is None else band_fail_before_n
 
     def __init__(self, repo=None, collections=None):
         """Initialize mock butler.
@@ -441,6 +454,7 @@ def __init__(self, repo=None, collections=None):
         self._collections = collections
         self.request_count = 0
         self.band_request_count = {}
+        self.band_attempt_count = {}
 
         # Ensure only one Mock Butler per thread
         thread_id = threading.current_thread().ident
@@ -456,6 +470,12 @@ def __init__(self, repo=None, collections=None):
         self._data = {}
 
     def _generate_errors(self, rng, band):
+        # Track attempts (before any failures) for band_fail_before_n
+        if self.band_attempt_count.get(band) is None:
+            self.band_attempt_count[band] = 1
+        else:
+            self.band_attempt_count[band] += 1
+
         if MockButler.fail_after_n != 0 and self.request_count >= MockButler.fail_after_n:
             msg = f"MockButler: Simulated fail after {self.request_count} requests."
             raise RuntimeError(msg)
@@ -469,6 +489,11 @@ def _generate_errors(self, rng, band):
             msg = f"MockButler: Simulated fail after {band_limit} requests to {band} band."
             raise RuntimeError(msg)
 
+        band_fail_before = MockButler.band_fail_before_n.get(band, 0)
+        if band_fail_before != 0 and self.band_attempt_count.get(band, 0) <= band_fail_before:
+            msg = f"MockButler: Simulated fail for first {band_fail_before} requests to {band} band."
+            raise RuntimeError(msg)
+
         band_fail_prob = MockButler.band_fail_prob.get(band, 0.0)
         if rng.random() > 1.0 - band_fail_prob:
             msg = f"MockButler: Simulated fail due to band failure probability {band} = {band_fail_prob}"
diff --git a/tests/hyrax/test_downloaded_lsst_dataset.py b/tests/hyrax/test_downloaded_lsst_dataset.py
@@ -9,6 +9,7 @@
 import mocks
 import pytest
 import torch
+import torchvision  # noqa: F401  # Import before mock contexts to prevent kernel re-registration
 from mocks import lsst_config, mock_lsst_environment, sample_catalog, sample_catalog_saved  # noqa: F401
 
 from hyrax.data_sets.downloaded_lsst_dataset import DownloadedLSSTDataset
@@ -427,6 +428,39 @@ def test_failed_band_download(mock_lsst_environment, lsst_config, tmp_path):  #
     assert torch.all(cutout[2] == cutout[2])
 
 
+def test_band_detection_with_partial_downloads(mock_lsst_environment, lsst_config, tmp_path):  # noqa: F811
+    """
+    Test that _get_available_bands_from_manifest correctly identifies bands
+    from complete downloads, ignoring partial downloads that may appear earlier
+    in the manifest.
+    """
+    # Configure 4 bands
+    lsst_config["data_set"]["filters"] = ["g", "r", "i", "z"]
+
+    with mock_lsst_environment():
+        # Make g and r bands fail for the FIRST 5 downloads each, then succeed
+        # Early entries will have only i,z (partial), later entries will have all 4
+        dataset = DownloadedLSSTDatasetMocked(
+            lsst_config,
+            data_location=str(tmp_path),
+            patcher=mock_lsst_environment,
+            patcher_kwargs={"band_fail_before_n": {"g": 5, "r": 5}},
+        )
+        _manifest = dataset.download_cutouts()
+
+    # Request only g,r,i bands - triggers _get_available_bands_from_manifest
+    # which must find complete 4-band entries to determine available bands
+    lsst_config["data_set"]["filters"] = ["g", "r", "i"]
+
+    dataset = DownloadedLSSTDatasetMocked(
+        lsst_config, data_location=str(tmp_path), patcher=mock_lsst_environment
+    )
+
+    # Verify band filtering found complete entries and set up correctly
+    assert dataset._is_filtering_bands is True
+    assert set(dataset.BANDS) == {"g", "r", "i"}
+
+
 def test_catalog_ordering(mock_lsst_environment, lsst_config, tmp_path, sample_catalog):  # noqa: F811
     """
     Test that after a download the ordering of a new dataset object is given in the same order
diff --git a/tests/hyrax/test_lsst_dataset.py b/tests/hyrax/test_lsst_dataset.py
@@ -10,6 +10,7 @@
 
 import mocks
 import torch
+import torchvision  # noqa: F401  # Import before mock contexts to prevent kernel re-registration
 from mocks import lsst_config, mock_lsst_environment, sample_catalog, sample_catalog_saved  # noqa: F401
 
 

Original file line number	Diff line number	Diff line change
`@@ -85,6 +85,7 @@ dev = [`
`85`	`85`	`"sphinx-togglebutton",`
`86`	`86`	`"sphinx-rtd-theme",`
`87`	`87`	`"lsdb", # Used to test lsst dataset classes`
	`88`	`+ "cdshealpix <= 0.7.1",`
`88`	`89`	`]`
`89`	`90`
`90`	`91`	`[build-system]`