11import logging
22import unittest .mock as mock
33from pathlib import Path
4+ from typing import Any , Optional
45
56import numpy as np
67import pytest
7- from fibad .data_sets .hsc_data_set import HSCDataSet
88from torchvision .transforms .v2 import CenterCrop , Lambda
99
10+ from fibad .data_sets .hsc_data_set import HSCDataSet
11+
1012test_dir = Path (__file__ ).parent / "test_data" / "dataloader"
1113
1214HSCDataSet ._called_from_test = True
@@ -27,8 +29,8 @@ class FakeFitsFS:
2729 more filesystem operations without a really good reason.
2830 """
2931
30- def __init__ (self , test_files : dict ):
31- self .patchers : list [mock ._patch [mock . Mock ]] = []
32+ def __init__ (self , test_files : dict , filter_catalog : Optional [ dict ] = None ):
33+ self .patchers : list [mock ._patch [Any ]] = []
3234
3335 self .test_files = test_files
3436
@@ -39,6 +41,17 @@ def __init__(self, test_files: dict):
3941 mock_fits_open = mock .Mock (side_effect = self ._open_file )
4042 self .patchers .append (mock .patch ("astropy.io.fits.open" , mock_fits_open ))
4143
44+ if filter_catalog is not None :
45+ mock_read_filter_catalog = mock .patch (
46+ "fibad.data_sets.hsc_data_set.HSCDataSet._read_filter_catalog" ,
47+ lambda x , y : "Not a real table" ,
48+ )
49+ self .patchers .append (mock_read_filter_catalog )
50+ mock_parse_filter_catalog = mock .patch (
51+ "fibad.data_sets.hsc_data_set.HSCDataSet._parse_filter_catalog" , lambda x , y : filter_catalog
52+ )
53+ self .patchers .append (mock_parse_filter_catalog )
54+
4255 def _open_file (self , filename : Path , ** kwargs ) -> mock .Mock :
4356 shape = self .test_files [filename .name ]
4457 mock_open_ctx = mock .Mock ()
@@ -117,6 +130,42 @@ def generate_files(
117130 return test_files
118131
119132
133+ def generate_filter_catalog (test_files : dict , config : dict ) -> dict :
134+ """Generates a filter catalog dict for use with FakeFitsFS from a filesystem dictionary
135+ created by generate_files.
136+
137+ This allows tests to alter the parsed filter_catalog, and interrogate what decisions HSCDataSet makes
138+ when a manifest or filter_catalog file contains corrupt information.
139+
140+ Caveats:
141+ Always Run this prior to creating any mocks, or using a FakeFitsFS
142+
143+ This function calls the HSCDataSet parsing code to actually assemble the files array, which means:
144+ 1) This test setup is fairly tightly coupled to the internals of HSCDataSet
145+ 2) When writing tests using this, ensure any functionality you depend on from the slow file-scan
146+ initialization codepath is tested indepently.
147+
148+ Parameters
149+ ----------
150+ test_files : dict
151+ Test files dictionary created with generate_files.
152+ config : dict
153+ The config you will use to initialize the test HSCDataSet object in your test.
154+ Create this with mkconfig()
155+
156+ Returns
157+ -------
158+ dict
159+ Dictionary from ObjectID -> (Dictionary from Filter -> Filename)
160+ """
161+ # Ensure the filter_catalog codepath won't be triggered
162+ config ["data_set" ]["filter_catalog" ] = False
163+
164+ # Use our initialization code to create a parsed files object
165+ with FakeFitsFS (test_files ):
166+ return HSCDataSet (config ).files
167+
168+
120169def test_load (caplog ):
121170 """Test to ensure loading a perfectly regular set of files works"""
122171 caplog .set_level (logging .WARNING )
@@ -313,6 +362,52 @@ def test_prune_size(caplog):
313362 assert "too small" in caplog .text
314363
315364
365+ def test_prune_filter_size_mismatch (caplog ):
366+ """Test to ensure images with different sizes per filter will be dropped"""
367+ caplog .set_level (logging .WARNING )
368+ test_files = {}
369+ test_files .update (generate_files (num_objects = 10 , num_filters = 5 , shape = (100 , 100 ), offset = 0 ))
370+ test_files ["00000000000000000_all_filters_HSC-R.fits" ] = (99 , 99 )
371+
372+ with FakeFitsFS (test_files ):
373+ a = HSCDataSet (mkconfig (crop_to = (99 , 99 )))
374+
375+ assert len (a ) == 9
376+ assert a .shape () == (5 , 99 , 99 )
377+
378+ # We should warn that we are dropping objects and the reason
379+ assert "Dropping object" in caplog .text
380+ assert "first filter" in caplog .text
381+
382+
383+ def test_prune_bad_filename (caplog ):
384+ """Test to ensure images with filenames set wrong will be dropped"""
385+ caplog .set_level (logging .WARNING )
386+ test_files = {}
387+ test_files .update (generate_files (num_objects = 10 , num_filters = 5 , shape = (100 , 100 ), offset = 0 ))
388+
389+ config = mkconfig (crop_to = (99 , 99 ), filter_catalog = "notarealfile.fits" )
390+
391+ # Create a filter catalog with wrong file information.
392+ filter_catalog = generate_filter_catalog (test_files , config )
393+ filters = list (filter_catalog ["00000000000000000" ].keys ())
394+ filter_catalog ["00000000000000000" ][filters [0 ]] = filter_catalog ["00000000000000001" ][filters [0 ]]
395+
396+ with FakeFitsFS (test_files , filter_catalog ):
397+ # Initialize HSCDataset exercising the filter_catalog provided initialization pathway
398+ a = HSCDataSet (config )
399+
400+ # Verify that the broken object has been dropped
401+ assert len (a ) == 9
402+
403+ # Verify the shape is correct.
404+ assert a .shape () == (5 , 99 , 99 )
405+
406+ # We should warn that we are dropping objects and the correct reason
407+ assert "Dropping object" in caplog .text
408+ assert "manifest is likely corrupt" in caplog .text
409+
410+
316411def test_partial_filter (caplog ):
317412 """Test to ensure when we only load some of the filters, only those filters end up in the dataset"""
318413 caplog .set_level (logging .WARNING )
0 commit comments