nasa · hhlee445 · Apr 2, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 30, 2026
diff --git a/cluster_provisioning/modules/common/variables.tf b/cluster_provisioning/modules/common/variables.tf
@@ -666,6 +666,54 @@ variable "queues" {
       "use_private_vpc"   = false
       "use_on_demand"     = false
     }
+    "opera-job_worker-rtc_for_dist_data_query_hist" = {
+      "name"              = "opera-job_worker-rtc_for_dist_data_query_hist"
+      "instance_type"     = ["m8a.large", "m8i-flex.large", "m8i.large", "m7a.large", "m7i-flex.large", "m6i.large", "m6a.large", "m5.large", "m5a.large"]
+      "user_data"         = "launch_template_user_data.sh.tmpl"
+      "root_dev_size"     = 50
+      "data_dev_size"     = 25
+      "min_size"          = 0
+      "max_size"          = 100
+      "total_jobs_metric" = false
+      "use_private_vpc"   = false
+      "use_on_demand"     = true
+    }
+    "opera-job_worker-dist_s1_hist_on_first" = {
+      "name"              = "opera-job_worker-dist_s1_hist_on_first"
+      "instance_type"     = ["m8a.large", "m8i-flex.large", "m8i.large", "m7a.large", "m7i-flex.large", "m6i.large", "m6a.large", "m5.large", "m5a.large"]
+      "user_data"         = "launch_template_user_data.sh.tmpl"
+      "root_dev_size"     = 50
+      "data_dev_size"     = 25
+      "min_size"          = 0
+      "max_size"          = 100
+      "total_jobs_metric" = false
+      "use_private_vpc"   = false
+      "use_on_demand"     = true
+    }
+    "opera-job_worker-dist_s1_hist_on_publication" = {
+      "name"              = "opera-job_worker-dist_s1_hist_on_publication"
+      "instance_type"     = ["m8a.large", "m8i-flex.large", "m8i.large", "m7a.large", "m7i-flex.large", "m6i.large", "m6a.large", "m5.large", "m5a.large"]
+      "user_data"         = "launch_template_user_data.sh.tmpl"
+      "root_dev_size"     = 50
+      "data_dev_size"     = 25
+      "min_size"          = 0
+      "max_size"          = 100
+      "total_jobs_metric" = false
+      "use_private_vpc"   = false
+      "use_on_demand"     = true
+    }
+    "opera-job_worker-dist_s1_hist_on_complete" = {
+      "name"              = "opera-job_worker-dist_s1_hist_on_complete"
+      "instance_type"     = ["m8a.large", "m8i-flex.large", "m8i.large", "m7a.large", "m7i-flex.large", "m6i.large", "m6a.large", "m5.large", "m5a.large"]
+      "user_data"         = "launch_template_user_data.sh.tmpl"
+      "root_dev_size"     = 50
+      "data_dev_size"     = 25
+      "min_size"          = 0
+      "max_size"          = 100
+      "total_jobs_metric" = false
+      "use_private_vpc"   = false
+      "use_on_demand"     = true
+    }
     "opera-job_worker-cslc_data_query" = {
       "name"              = "opera-job_worker-cslc_data_query"
       "instance_type"     = ["c6i.xlarge", "m6a.xlarge", "c6a.xlarge", "c5a.xlarge", "r7i.xlarge", "c7i.xlarge"]
@@ -786,6 +834,18 @@ variable "queues" {
       "use_private_vpc"   = false
       "use_on_demand"     = false
     }
+    "opera-job_worker-rtc_for_dist_data_download_hist" = {
+      "name"              = "opera-job_worker-rtc_for_dist_data_download_hist"
+      "instance_type"     = ["c6in.large", "c5n.large", "m6in.large", "m5n.large"]
+      "user_data"         = "launch_template_user_data.sh.tmpl"
+      "root_dev_size"     = 50
+      "data_dev_size"     = 25
+      "min_size"          = 0
+      "max_size"          = 50
+      "total_jobs_metric" = true
+      "use_private_vpc"   = false
+      "use_on_demand"     = false
+    }
     "opera-job_worker-ecmwf-merger" = {
       "name"              = "opera-job_worker-ecmwf-merger"
       "instance_type"     = ["r5a.4xlarge", "r6a.4xlarge", "r5.4xlarge", "r6i.4xlarge", "r7i.4xlarge", "r7a.4xlarge", "r6a.2xlarge"]

diff --git a/conf/sds/files/datasets.json b/conf/sds/files/datasets.json
@@ -511,6 +511,14 @@
           "s3://{{ DATASET_S3_ENDPOINT }}:80/{{ DATASET_BUCKET }}/browse/inputs/GSLC_NI/{id}"
         ]
       }
+    },
+    {
+      "ipath": "hysds::data/DIST_S1-state-config",
+      "level": "STATE-CONFIG",
+      "type": "DIST_S1-STATE-CONFIG",
+      "match_pattern": "/(?P<id>DIST_S1_state-config_.+)$",
+      "alt_match_pattern": null,
+      "extractor": null
     }
   ]
 }
diff --git a/conf/sds/rules/user_rules.json b/conf/sds/rules/user_rules.json
@@ -111,6 +111,48 @@
       "username": "hysdsops",
       "workflow": "hysds-io-SCIFLO_L3_DSWx_HLS:__TAG__",
       "job_spec": "job-SCIFLO_L3_DSWx_HLS:__TAG__"
+    },
+    {
+      "enabled": true,
+      "job_type": "hysds-io-rtc_for_dist_query_dist_on_pub:__TAG__",
+      "kwargs": "{}",
+      "passthru_query": false,
+      "priority": 0,
+      "query_all": false,
+      "query_string": "{\"bool\": {\"must\": [{\"term\": {\"dataset_type.keyword\": \"L3_DIST_S1\"}}], \"must_not\": [{\"term\": {\"metadata.restaged\": \"true\"}}]}}",
+      "queue": "opera-job_worker-dist_s1_hist_on_publication",
+      "rule_name": "trigger-rtc_for_dist_query_dist_on_pub",
+      "username": "hysdsops",
+      "workflow": "hysds-io-rtc_for_dist_query_dist_on_pub:__TAG__",
+      "job_spec": "job-rtc_for_dist_query_dist_on_pub:__TAG__"
+    },
+    {
+      "enabled": true,
+      "job_type": "hysds-io-rtc_for_dist_query_sc_on_first:__TAG__",
+      "kwargs": "{}",
+      "passthru_query": false,
+      "priority": 0,
+      "query_all": false,
+      "query_string": "{\"bool\": {\"must\": [{\"term\": {\"dataset_type.keyword\": \"DIST_S1-STATE-CONFIG\"}}, {\"term\": {\"metadata.first\": true}}],\"must_not\": [{\"term\": {\"metadata.is_complete\": true}}]}}",
+      "queue": "opera-job_worker-dist_s1_hist_on_first",
+      "rule_name": "trigger-rtc_for_dist_query_sc_on_first",
+      "username": "hysdsops",
+      "workflow": "hysds-io-rtc_for_dist_query_sc_on_first:__TAG__",
+      "job_spec": "job-rtc_for_dist_query_sc_on_first:__TAG__"
+    },
+    {
+      "enabled": true,
+      "job_type": "hysds-io-rtc_for_dist_query_sc_on_complete:__TAG__",
+      "kwargs": "{}",
+      "passthru_query": false,
+      "priority": 0,
+      "query_all": false,
+      "query_string": "{\"bool\": {\"must\": [{\"term\": {\"dataset_type.keyword\": \"DIST_S1-STATE-CONFIG\"}}, {\"term\": {\"metadata.is_complete\": true}}]}}",
+      "queue": "opera-job_worker-dist_s1_hist_on_complete",
+      "rule_name": "trigger-rtc_for_dist_query_sc_on_complete",
+      "username": "hysdsops",
+      "workflow": "hysds-io-rtc_for_dist_query_sc_on_complete:__TAG__",
+      "job_spec": "job-rtc_for_dist_query_sc_on_complete:__TAG__"
     }
   ],
   "mozart": []

diff --git a/data_subscriber/cslc_utils.py b/data_subscriber/cslc_utils.py
@@ -242,17 +242,21 @@ def process_frame_geo_json(file):
     return frame_geo_map
 
 def parse_r2_product_file_name(native_id, product_type):
+    match_product_id = _datasets_json_match(product_type, native_id)
+    burst_id = match_product_id.group("burst_id")  # e.g. T074-157286-IW3 (for RTC and CSLC)
+    acquisition_dts = match_product_id.group("acquisition_ts")  # e.g. 20210705T183117Z
+    return burst_id, acquisition_dts
 
+# TODO chrisjrd: move to dataset_util.py or similar
+def _datasets_json_match(product_type, native_id):
     dataset_json = datasets_json_util.DatasetsJson()
     cslc_granule_regex = dataset_json.get(product_type)["match_pattern"]
     match_product_id = re.match(cslc_granule_regex, native_id)
 
     if not match_product_id:
         raise ValueError(f"{product_type} native ID {native_id} could not be parsed with regex from datasets.json")
+    return match_product_id
 
-    burst_id = match_product_id.group("burst_id")  # e.g. T074-157286-IW3 (for RTC and CSLC)
-    acquisition_dts = match_product_id.group("acquisition_ts")  # e.g. 20210705T183117Z
-    return burst_id, acquisition_dts
 
 def parse_cslc_file_name(native_id):
     return parse_r2_product_file_name(native_id, "L2_CSLC_S1")

diff --git a/data_subscriber/daac_data_subscriber.py b/data_subscriber/daac_data_subscriber.py
@@ -56,7 +56,7 @@ def run(argv: list[str]):
 
     es_conn = supply_es_conn(args)
 
-    logger.debug(f"daac_data_subscriber.py invoked with {args=}")
+    logger.info(f"daac_data_subscriber.py invoked with {args=}")
 
     job_id = supply_job_id()
     logger.debug(f"Using {job_id=}")

diff --git a/data_subscriber/dist_s1_utils.py b/data_subscriber/dist_s1_utils.py
@@ -87,21 +87,21 @@ def process_dist_burst_db(file):
     logger.info(f"Processing {df.shape[0]} rows in the DIST-S1 burst database file...")
 
     # Create a dictionary of tile ids and the products that are associated with them
-    for index, row in df.iterrows():
+    for row in df.itertuples():
         #print(row['mgrs_tile_id'], row['acq_group_id_within_mgrs_tile'])
-        tile_id = row['mgrs_tile_id']
-        unique_acquisition = row['acq_group_id_within_mgrs_tile']
+        tile_id = row.mgrs_tile_id
+        unique_acquisition = row.acq_group_id_within_mgrs_tile
         product_id = tile_id + "_" + str(unique_acquisition)
         if product_id not in dist_products[tile_id]:
             dist_products[tile_id].add(product_id)
 
-        jpl_burst_id = row['jpl_burst_id']
+        jpl_burst_id = row.jpl_burst_id
         bursts_to_products[jpl_burst_id].add(product_id)
         product_to_bursts[product_id].add(jpl_burst_id)
 
         if jpl_burst_id in all_burst_ids:
             rtc_bursts_reused += 1
-        all_burst_ids.add(row['jpl_burst_id'])
+        all_burst_ids.add(row.jpl_burst_id)
 
     print(f"Total of {len(all_burst_ids)} unique RTC bursts in this database file.")
     print(f"RTC Bursts were reused {rtc_bursts_reused} times in this database file.")