Skip to content

Commit dbe8841

Browse files
committed
Add is_hostless and finkclass columns in the database ingestion script. Use already available tns column
1 parent 1da4301 commit dbe8841

File tree

2 files changed

+32
-21
lines changed

2 files changed

+32
-21
lines changed

fink_broker/ztf/hbase_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ def load_fink_cols():
8383
"gaiaClass": {"type": "string", "default": "Unknown"},
8484
"is_transient": {"type": "boolean", "default": False},
8585
"slsn_score": {"type": "float", "default": -1},
86+
"finkclass": {"type": "float", "default": "Unknown"},
87+
"is_hostless": {"type": "boolean", "default": False},
8688
}
8789

8890
fink_nested_cols = {}

fink_broker/ztf/science.py

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import os
2020
import logging
2121

22+
import numpy as np
2223
from fink_utils.spark.utils import concat_col
2324

2425
from fink_broker.common.tester import spark_unit_tests
@@ -437,33 +438,41 @@ def apply_science_modules(df: DataFrame, tns_raw_output: str = "") -> DataFrame:
437438
"candidate.jd",
438439
"candidate.jdstarthist",
439440
"rf_kn_vs_nonkn",
440-
"tracklet",
441+
F.lit(""),
441442
]
442443
df = df.withColumn("finkclass", extract_fink_classification(*fink_classifier_cols))
443-
df = df.withColumn("tnsclass", F.lit("Unknown"))
444444
df = df.withColumn(
445-
"elephant_kstest",
446-
F.slice(
447-
run_potential_hostless(
448-
df["cmagpsf"],
449-
df["cutoutScience.stampData"],
450-
df["cutoutTemplate.stampData"],
451-
df["snn_snia_vs_nonia"],
452-
df["snn_sn_vs_all"],
453-
df["rf_snia_vs_nonia"],
454-
df["rf_kn_vs_nonkn"],
455-
df["finkclass"],
456-
df["tnsclass"],
457-
df["candidate.jd"] - df["candidate.jdstarthist"],
458-
df["roid"],
459-
),
460-
1,
461-
2,
445+
"kstest_static",
446+
run_potential_hostless(
447+
df["cmagpsf"],
448+
df["cutoutScience.stampData"],
449+
df["cutoutTemplate.stampData"],
450+
df["snn_snia_vs_nonia"],
451+
df["snn_sn_vs_all"],
452+
df["rf_snia_vs_nonia"],
453+
df["rf_kn_vs_nonkn"],
454+
df["finkclass"],
455+
df["tns"],
456+
df["candidate.jd"] - df["candidate.jdstarthist"],
457+
df["roid"],
462458
),
463459
)
464-
expanded.extend(["finkclass", "tnsclass"])
465-
df = df.drop(*expanded)
460+
cond_science_low = df["kstest_static"][0] >= 0.0
461+
cond_science_high = df["kstest_static"][0] <= 0.5
462+
cond_template_low = df["kstest_static"][1] >= 0.0
463+
cond_template_high = df["kstest_static"][1] <= 0.85
464+
cond_max_detections = F.size(F.array_remove("cmagpsf", np.nan)) <= 20
465+
466+
df = df.withColumn(
467+
"is_hostless",
468+
cond_science_low
469+
& cond_science_high
470+
& cond_template_low
471+
& cond_template_high
472+
& cond_max_detections,
473+
)
466474

475+
expanded.extend(["kstest_static"])
467476
# Drop temp columns
468477
df = df.drop(*expanded)
469478

0 commit comments

Comments
 (0)