Skip to content

Commit 82126e1

Browse files
Add additional cuts and update the module to operate on higher resolution (#675)
1 parent 6cde531 commit 82126e1

File tree

8 files changed

+223
-36
lines changed

8 files changed

+223
-36
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"minimum_number_of_alerts": 3,
3+
"cutout_timeframe": 30,
4+
"cutout_magnitude": 20
5+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"image_shape": [30, 30],
3+
"sigma_clipping_kwargs": {
4+
"sigma": 3,
5+
"maxiters": 10
6+
},
7+
"hostless_detection_with_clipping": {
8+
"crop_radius": 15,
9+
"max_number_of_pixels_clipped": 5,
10+
"min_number_of_pixels_clipped": 3
11+
}
12+
}

fink_science/rubin/hostless_detection/pipeline_utils.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,17 @@
1515
"""Implementation of the paper: ELEPHANT: ExtragaLactic alErt Pipeline for Hostless AstroNomical Transients https://arxiv.org/abs/2404.18165"""
1616

1717
import io
18+
from typing import Dict
1819

1920
from astropy.io import fits
2021
import numpy as np
2122

23+
from fink_science.ztf.hostless_detection.pipeline_utils import (
24+
apply_sigma_clipping,
25+
_check_hostless_conditions,
26+
crop_center_patch,
27+
)
28+
2229

2330
def read_cutout_stamp(fits_bytes: bytes) -> np.ndarray:
2431
"""
@@ -32,3 +39,69 @@ def read_cutout_stamp(fits_bytes: bytes) -> np.ndarray:
3239
fits_buffer = io.BytesIO(fits_bytes)
3340
with fits.open(fits_buffer) as hdulist:
3441
return hdulist[0].data
42+
43+
44+
def is_outliers_in_template(clipped_image, number_of_pixels: int = 20):
45+
"""Checks if is a big host pixels by applying sigma clipping
46+
47+
The threshold value is decided based on the samples we have seen.
48+
But should be updated eventually
49+
"""
50+
num_template_pixels_masked = np.ma.count_masked(clipped_image)
51+
print(num_template_pixels_masked)
52+
if num_template_pixels_masked > number_of_pixels:
53+
return True
54+
return False
55+
56+
57+
def run_hostless_detection_with_clipped_data(
58+
science_stamp: np.ndarray,
59+
template_stamp: np.ndarray,
60+
configs: Dict,
61+
check_outliers_in_template: bool = True,
62+
) -> bool:
63+
"""Detects potential hostless candidates
64+
65+
Notes
66+
-----
67+
We use sigma clipped stamp images by
68+
cropping an image patch from the center of the image.
69+
If pixels are rejected in scientific image but not in corresponding
70+
template image, such candidates are flagged as potential hostless
71+
72+
Parameters
73+
----------
74+
science_stamp
75+
science image
76+
template_stamp
77+
template image
78+
configs
79+
detection configs with detection threshold
80+
"""
81+
sigma_clipping_config = configs["sigma_clipping_kwargs"]
82+
83+
science_clipped = apply_sigma_clipping(science_stamp, sigma_clipping_config)
84+
template_clipped = apply_sigma_clipping(template_stamp, sigma_clipping_config)
85+
86+
if check_outliers_in_template:
87+
if is_outliers_in_template(template_clipped):
88+
return False
89+
detection_config = configs["hostless_detection_with_clipping"]
90+
91+
is_hostless_candidate = _check_hostless_conditions(
92+
science_clipped, template_clipped, detection_config
93+
)
94+
if is_hostless_candidate:
95+
return is_hostless_candidate
96+
97+
# Check again at half resolution
98+
crop_radius = int(detection_config["crop_radius"] / 2)
99+
science_stamp = crop_center_patch(science_stamp, crop_radius)
100+
template_stamp = crop_center_patch(template_stamp, crop_radius)
101+
102+
science_clipped = apply_sigma_clipping(science_stamp, sigma_clipping_config)
103+
template_clipped = apply_sigma_clipping(template_stamp, sigma_clipping_config)
104+
is_hostless_candidate = _check_hostless_conditions(
105+
science_clipped, template_clipped, detection_config
106+
)
107+
return is_hostless_candidate

fink_science/rubin/hostless_detection/processor.py

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@
3232

3333
# Share configuration with ZTF
3434
CONFIGS_BASE = load_json(
35-
"{}/ztf/hostless_detection/config_base.json".format(os.path.dirname(__file__))
35+
"{}/rubin/hostless_detection/config_base.json".format(os.path.dirname(__file__))
3636
)
3737
CONFIGS = load_json(
38-
"{}/ztf/hostless_detection/config.json".format(os.path.dirname(__file__))
38+
"{}/rubin/hostless_detection/config.json".format(os.path.dirname(__file__))
3939
)
4040
CONFIGS.update(CONFIGS_BASE)
4141

@@ -52,10 +52,16 @@ def run_potential_hostless(
5252
ssObjectId: pd.Series,
5353
nDiaSources: pd.Series,
5454
psfFlux: pd.Series,
55+
templateFlux: pd.Series,
56+
templateFluxErr: pd.Series,
5557
midpointMjdTai: pd.Series,
5658
firstDiaSourceMjdTaiFink: pd.Series,
5759
simbad_otype: pd.Series,
5860
gaiadr3_DR3Name: pd.Series,
61+
mangrove_2MASS_name: pd.Series,
62+
mangrove_HyperLEDA_name: pd.Series,
63+
legacydr8_zphot: pd.Series,
64+
spicy_class: pd.Series,
5965
) -> pd.Series:
6066
"""Runs potential hostless candidate detection for Rubin without any filtering
6167
@@ -71,6 +77,10 @@ def run_potential_hostless(
7177
Number of previous detections
7278
psfFlux: pd.Series
7379
Flux in the difference image
80+
templateFlux: pd.Series,
81+
Flux in template image
82+
templateFluxErr: pd.Series
83+
Flux error in template image
7484
midpointMjdTai: pd.Series
7585
Emission date for the alert
7686
firstDiaSourceMjdTaiFink: pd.Series
@@ -79,6 +89,15 @@ def run_potential_hostless(
7989
SIMBAD type. NaN if not existing
8090
gaiadr3_DR3Name: pd.Series
8191
Name in Gaia DR3. NaN if not existing
92+
mangrove_2MASS_name: pd.Series
93+
Name in mangrove 2MASS. NaN if not existing
94+
mangrove_HyperLEDA_name: pd.Series
95+
Name in mangrove HyperLEDA. NaN if not existing
96+
legacydr8_zphot: pd.Series
97+
Photo-z estimate from Legacy Surveys DR8 South Photometric Redshifts catalog.
98+
NaN if not existing
99+
spicy_class
100+
closest source from SPICY catalog. Nan if not existing
82101
83102
Notes
84103
-----
@@ -114,12 +133,18 @@ def run_potential_hostless(
114133
... F.lit(None),
115134
... F.lit(3),
116135
... F.lit(100000000),
136+
... F.lit(100000000),
137+
... F.lit(100),
117138
... df["diaSource.midpointMjdTai"],
118139
... df["diaSource.midpointMjdTai"],
119140
... F.lit(None),
141+
... F.lit(None),
142+
... F.lit(None),
143+
... F.lit(None),
144+
... F.lit(None),
120145
... F.lit(None),))
121146
>>> df.filter(df.elephant_kstest_no_cuts.kstest_science.isNotNull()).count()
122-
3
147+
0
123148
124149
# SSO cuts
125150
>>> df = df.withColumn('elephant_kstest_sso_cuts',
@@ -129,12 +154,18 @@ def run_potential_hostless(
129154
... df["ssSource.ssObjectId"],
130155
... F.lit(3),
131156
... F.lit(100000000),
157+
... F.lit(100000000),
158+
... F.lit(100),
132159
... df["diaSource.midpointMjdTai"],
133160
... df["diaSource.midpointMjdTai"],
134161
... F.lit(None),
162+
... F.lit(None),
163+
... F.lit(None),
164+
... F.lit(None),
165+
... F.lit(None),
135166
... F.lit(None),))
136167
>>> df.filter(df.elephant_kstest_sso_cuts.kstest_science.isNotNull()).count()
137-
1
168+
0
138169
139170
# All cuts
140171
>>> df = df.withColumn('elephant_kstest',
@@ -144,27 +175,69 @@ def run_potential_hostless(
144175
... df["ssSource.ssObjectId"],
145176
... df["diaObject.nDiaSources"],
146177
... df["diaSource.psfFlux"],
178+
... df["diaSource.templateFlux"],
179+
... df["diaSource.templateFluxErr"],
147180
... df["diaSource.midpointMjdTai"],
148181
... F.array_min("prvDiaSources.midpointMjdTai"),
149182
... F.lit("star"),
150-
... F.lit("DR3 toto"),))
183+
... F.lit("DR3 toto"),
184+
... F.lit("galaxy"),
185+
... F.lit("galaxy"),
186+
... F.lit("galaxy"),
187+
... F.lit("galaxy"),))
151188
>>> df.filter(df.elephant_kstest.kstest_science.isNotNull()).count()
152189
0
153190
"""
154-
f_min_point = nDiaSources >= 2 # N + 1
191+
f_min_point = nDiaSources >= CONFIGS["minimum_number_of_alerts"] - 1 # N + 1
155192
# FIXME: put the conversion formula in fink-utils
156-
f_bright = (-2.5 * np.log10(psfFlux) + ZP_NJY) < 20
193+
f_bright = (-2.5 * np.log10(psfFlux) + ZP_NJY) < CONFIGS["cutout_magnitude"]
157194
f_not_in_simbad = simbad_otype.apply(lambda val: val in BAD_VALUES or pd.isna(val))
158195
f_not_in_gaia = gaiadr3_DR3Name.apply(lambda val: val in BAD_VALUES or pd.isna(val))
196+
f_not_in_2mass = mangrove_2MASS_name.apply(
197+
lambda val: val in BAD_VALUES or pd.isna(val)
198+
)
199+
f_not_in_hyperlda = mangrove_HyperLEDA_name.apply(
200+
lambda val: val in BAD_VALUES or pd.isna(val)
201+
)
202+
f_not_in_legacydr8 = legacydr8_zphot.apply(
203+
lambda val: val in BAD_VALUES or pd.isna(val)
204+
)
205+
f_not_in_spicy = spicy_class.apply(lambda val: val in BAD_VALUES or pd.isna(val))
206+
159207
f_not_sso = ssObjectId.apply(lambda val: val in BAD_VALUES or pd.isna(val))
160-
f_early = (midpointMjdTai - firstDiaSourceMjdTaiFink) < 30
208+
f_early = (midpointMjdTai - firstDiaSourceMjdTaiFink) < CONFIGS["cutout_timeframe"]
161209

210+
# Cuts on psfFlux and templateFlux
211+
PSF_FLUX_THRESHOLD = 0
212+
TEMPLATE_FLUX_THRESHOLD = [300, 2000]
213+
psfFlux_cut = psfFlux > PSF_FLUX_THRESHOLD
214+
templateFlux_cut = (templateFlux > TEMPLATE_FLUX_THRESHOLD[0]) & (
215+
templateFlux < TEMPLATE_FLUX_THRESHOLD[1]
216+
)
217+
# SNR cut > 5
218+
template_snr_cut = (templateFlux / templateFluxErr) > 5
162219
good_candidate = (
163-
f_min_point & f_bright & f_not_in_simbad & f_not_in_gaia & f_not_sso & f_early
220+
f_min_point
221+
& f_bright
222+
& f_not_in_simbad
223+
& f_not_in_gaia
224+
& f_not_sso
225+
& f_early
226+
& psfFlux_cut
227+
& templateFlux_cut
228+
& template_snr_cut
229+
& f_not_in_2mass
230+
& f_not_in_hyperlda
231+
& f_not_in_legacydr8
232+
& f_not_in_spicy
164233
)
165-
166-
default_result = {"kstest_science": None, "kstest_template": None}
234+
# Process full image
235+
default_result = {
236+
"kstest_science": None,
237+
"kstest_template": None,
238+
}
167239
kstest_results = []
240+
168241
hostless_science_class = HostLessExtragalacticRubin(CONFIGS_BASE)
169242
for index in range(cutoutScience.shape[0]):
170243
if good_candidate[index]:

fink_science/rubin/hostless_detection/run_pipeline.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,17 @@
1414
# limitations under the License.
1515
"""Implementation of the paper: ELEPHANT: ExtragaLactic alErt Pipeline for Hostless AstroNomical Transients https://arxiv.org/abs/2404.18165"""
1616

17-
from typing import Tuple
17+
from typing import Union, Any
1818

1919
from fink_science.ztf.hostless_detection.pipeline_utils import (
20-
run_hostless_detection_with_clipped_data,
2120
run_powerspectrum_analysis,
21+
crop_center_patch,
2222
)
2323
from fink_science.ztf.hostless_detection.run_pipeline import HostLessExtragalactic
24-
from fink_science.rubin.hostless_detection.pipeline_utils import read_cutout_stamp
24+
from fink_science.rubin.hostless_detection.pipeline_utils import (
25+
read_cutout_stamp,
26+
run_hostless_detection_with_clipped_data,
27+
)
2528

2629

2730
class HostLessExtragalacticRubin(HostLessExtragalactic):
@@ -34,7 +37,7 @@ class HostLessExtragalacticRubin(HostLessExtragalactic):
3437

3538
def process_candidate_fink_rubin(
3639
self, science_stamp: bytes, template_stamp: bytes
37-
) -> Tuple[float, float]:
40+
) -> Union[tuple[None, None], tuple[Any, Any]]:
3841
"""
3942
Processes each candidate
4043
@@ -46,27 +49,37 @@ def process_candidate_fink_rubin(
4649
template stamp data
4750
"""
4851
science_stamp = read_cutout_stamp(science_stamp)
49-
5052
template_stamp = read_cutout_stamp(template_stamp)
53+
crop_radius = self.configs["hostless_detection_with_clipping"]["crop_radius"]
54+
science_stamp = crop_center_patch(science_stamp, crop_radius)
55+
template_stamp = crop_center_patch(template_stamp, crop_radius)
5156

5257
if science_stamp.shape != template_stamp.shape:
5358
return None, None
5459

55-
science_stamp_clipped, template_stamp_clipped = self._run_sigma_clipping(
56-
science_stamp, template_stamp
57-
)
60+
if (science_stamp.shape[0] < self._image_shape[0]) or (
61+
template_stamp.shape[0] < self._image_shape[0]
62+
):
63+
return None, None
64+
65+
science_stamp = crop_center_patch(science_stamp, crop_radius)
66+
template_stamp = crop_center_patch(template_stamp, crop_radius)
67+
5868
is_hostless_candidate = run_hostless_detection_with_clipped_data(
59-
science_stamp_clipped, template_stamp_clipped, self.configs
69+
science_stamp, template_stamp, self.configs
6070
)
6171
if is_hostless_candidate:
72+
science_stamp_clipped, template_stamp_clipped = self._run_sigma_clipping(
73+
science_stamp, template_stamp
74+
)
6275
power_spectrum_results = run_powerspectrum_analysis(
6376
science_stamp,
6477
template_stamp,
6578
science_stamp_clipped.mask.astype(int),
6679
template_stamp_clipped.mask.astype(int),
67-
self._image_shape,
80+
crop_radius=crop_radius,
6881
)
6982
return power_spectrum_results[
70-
"kstest_SCIENCE_15_statistic"
71-
], power_spectrum_results["kstest_TEMPLATE_15_statistic"]
83+
"kstest_SCIENCE_statistic"
84+
], power_spectrum_results["kstest_TEMPLATE_statistic"]
7285
return None, None

fink_science/ztf/hostless_detection/pipeline_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,14 @@ def run_hostless_detection_with_clipped_data(
150150

151151
science_clipped = apply_sigma_clipping(science_stamp, sigma_clipping_config)
152152
template_clipped = apply_sigma_clipping(template_stamp, sigma_clipping_config)
153+
153154
detection_config = configs["hostless_detection_with_clipping"]
154155
is_hostless_candidate = _check_hostless_conditions(
155156
science_clipped, template_clipped, detection_config
156157
)
157158
if is_hostless_candidate:
158159
return is_hostless_candidate
160+
159161
science_stamp = crop_center_patch(science_stamp, detection_config["crop_radius"])
160162
template_stamp = crop_center_patch(template_stamp, detection_config["crop_radius"])
161163
science_clipped = apply_sigma_clipping(science_stamp, sigma_clipping_config)
@@ -195,8 +197,8 @@ def run_powerspectrum_analysis(
195197
template_image: np.ndarray,
196198
science_mask: np.ndarray,
197199
template_mask: np.ndarray,
198-
image_size: List,
199200
number_of_iterations: int = 200,
201+
crop_radius: int = 15,
200202
) -> Dict:
201203
"""Runs powerspectrum analysis
202204
@@ -232,5 +234,6 @@ def run_powerspectrum_analysis(
232234
template_data,
233235
number_of_iterations=number_of_iterations,
234236
metric="kstest",
237+
cutout_size=crop_radius * 2,
235238
)
236239
return kstest_results_dict

0 commit comments

Comments
 (0)