|
19 | 19 | import os |
20 | 20 | import logging |
21 | 21 |
|
| 22 | +import numpy as np |
22 | 23 | from fink_utils.spark.utils import concat_col |
23 | 24 |
|
24 | 25 | from fink_broker.common.tester import spark_unit_tests |
@@ -437,33 +438,41 @@ def apply_science_modules(df: DataFrame, tns_raw_output: str = "") -> DataFrame: |
437 | 438 | "candidate.jd", |
438 | 439 | "candidate.jdstarthist", |
439 | 440 | "rf_kn_vs_nonkn", |
440 | | - "tracklet", |
| 441 | + F.lit(""), |
441 | 442 | ] |
442 | 443 | df = df.withColumn("finkclass", extract_fink_classification(*fink_classifier_cols)) |
443 | | - df = df.withColumn("tnsclass", F.lit("Unknown")) |
444 | 444 | df = df.withColumn( |
445 | | - "elephant_kstest", |
446 | | - F.slice( |
447 | | - run_potential_hostless( |
448 | | - df["cmagpsf"], |
449 | | - df["cutoutScience.stampData"], |
450 | | - df["cutoutTemplate.stampData"], |
451 | | - df["snn_snia_vs_nonia"], |
452 | | - df["snn_sn_vs_all"], |
453 | | - df["rf_snia_vs_nonia"], |
454 | | - df["rf_kn_vs_nonkn"], |
455 | | - df["finkclass"], |
456 | | - df["tnsclass"], |
457 | | - df["candidate.jd"] - df["candidate.jdstarthist"], |
458 | | - df["roid"], |
459 | | - ), |
460 | | - 1, |
461 | | - 2, |
| 445 | + "kstest_static", |
| 446 | + run_potential_hostless( |
| 447 | + df["cmagpsf"], |
| 448 | + df["cutoutScience.stampData"], |
| 449 | + df["cutoutTemplate.stampData"], |
| 450 | + df["snn_snia_vs_nonia"], |
| 451 | + df["snn_sn_vs_all"], |
| 452 | + df["rf_snia_vs_nonia"], |
| 453 | + df["rf_kn_vs_nonkn"], |
| 454 | + df["finkclass"], |
| 455 | + df["tns"], |
| 456 | + df["candidate.jd"] - df["candidate.jdstarthist"], |
| 457 | + df["roid"], |
462 | 458 | ), |
463 | 459 | ) |
464 | | - expanded.extend(["finkclass", "tnsclass"]) |
465 | | - df = df.drop(*expanded) |
| 460 | + cond_science_low = df["kstest_static"][0] >= 0.0 |
| 461 | + cond_science_high = df["kstest_static"][0] <= 0.5 |
| 462 | + cond_template_low = df["kstest_static"][1] >= 0.0 |
| 463 | + cond_template_high = df["kstest_static"][1] <= 0.85 |
| 464 | + cond_max_detections = F.size(F.array_remove("cmagpsf", np.nan)) <= 20 |
| 465 | + |
| 466 | + df = df.withColumn( |
| 467 | + "is_hostless", |
| 468 | + cond_science_low |
| 469 | + & cond_science_high |
| 470 | + & cond_template_low |
| 471 | + & cond_template_high |
| 472 | + & cond_max_detections, |
| 473 | + ) |
466 | 474 |
|
| 475 | + expanded.extend(["kstest_static"]) |
467 | 476 | # Drop temp columns |
468 | 477 | df = df.drop(*expanded) |
469 | 478 |
|
|
0 commit comments