Skip to content

Commit 9d62bb2

Browse files
committed
feat(DIST-S1): DIST-S1 historical state-config
1 parent 0d4fde4 commit 9d62bb2

27 files changed

+1173
-197
lines changed

conf/sds/files/datasets.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,14 @@
511511
"s3://{{ DATASET_S3_ENDPOINT }}:80/{{ DATASET_BUCKET }}/browse/inputs/GSLC_NI/{id}"
512512
]
513513
}
514+
},
515+
{
516+
"ipath": "hysds::data/DIST_S1-state-config",
517+
"level": "STATE-CONFIG",
518+
"type": "DIST_S1-STATE-CONFIG",
519+
"match_pattern": "/(?P<id>DIST_S1_state-config_.+)$",
520+
"alt_match_pattern": null,
521+
"extractor": null
514522
}
515523
]
516524
}

conf/sds/rules/user_rules.json

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,48 @@
111111
"username": "hysdsops",
112112
"workflow": "hysds-io-SCIFLO_L3_DSWx_HLS:__TAG__",
113113
"job_spec": "job-SCIFLO_L3_DSWx_HLS:__TAG__"
114+
},
115+
{
116+
"enabled": true,
117+
"job_type": "hysds-io-rtc_for_dist_query_dist_on_pub:__TAG__",
118+
"kwargs": "{}",
119+
"passthru_query": false,
120+
"priority": 0,
121+
"query_all": false,
122+
"query_string": "{\"bool\": {\"must\": [{\"term\": {\"dataset_type.keyword\": \"L3_DIST_S1\"}}], \"must_not\": [{\"term\": {\"metadata.restaged\": \"true\"}}]}}",
123+
"queue": "factotum-job_worker-small",
124+
"rule_name": "trigger-rtc_for_dist_query_dist_on_pub",
125+
"username": "hysdsops",
126+
"workflow": "hysds-io-rtc_for_dist_query_dist_on_pub:__TAG__",
127+
"job_spec": "job-rtc_for_dist_query_dist_on_pub:__TAG__"
128+
},
129+
{
130+
"enabled": true,
131+
"job_type": "hysds-io-rtc_for_dist_query_sc_on_first:__TAG__",
132+
"kwargs": "{}",
133+
"passthru_query": false,
134+
"priority": 0,
135+
"query_all": false,
136+
"query_string": "{\"bool\": {\"must\": [{\"term\": {\"dataset_type.keyword\": \"DIST_S1-STATE-CONFIG\"}}, {\"term\": {\"metadata.first\": true}}]}}",
137+
"queue": "factotum-job_worker-small",
138+
"rule_name": "trigger-rtc_for_dist_query_sc_on_first",
139+
"username": "hysdsops",
140+
"workflow": "hysds-io-rtc_for_dist_query_sc_on_first:__TAG__",
141+
"job_spec": "job-rtc_for_dist_query_sc_on_first:__TAG__"
142+
},
143+
{
144+
"enabled": true,
145+
"job_type": "hysds-io-rtc_for_dist_query_sc_on_complete:__TAG__",
146+
"kwargs": "{}",
147+
"passthru_query": false,
148+
"priority": 0,
149+
"query_all": false,
150+
"query_string": "{\"bool\": {\"must\": [{\"term\": {\"dataset_type.keyword\": \"DIST_S1-STATE-CONFIG\"}}, {\"term\": {\"metadata.is_complete\": true}}]}}",
151+
"queue": "factotum-job_worker-small",
152+
"rule_name": "trigger-rtc_for_dist_query_sc_on_complete",
153+
"username": "hysdsops",
154+
"workflow": "hysds-io-rtc_for_dist_query_sc_on_complete:__TAG__",
155+
"job_spec": "job-rtc_for_dist_query_sc_on_complete:__TAG__"
114156
}
115157
],
116158
"mozart": []

data_subscriber/cslc_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,17 +215,21 @@ def process_frame_geo_json(file):
215215
return frame_geo_map
216216

217217
def parse_r2_product_file_name(native_id, product_type):
218+
match_product_id = _datasets_json_match(product_type, native_id)
219+
burst_id = match_product_id.group("burst_id") # e.g. T074-157286-IW3 (for RTC and CSLC)
220+
acquisition_dts = match_product_id.group("acquisition_ts") # e.g. 20210705T183117Z
221+
return burst_id, acquisition_dts
218222

223+
# TODO chrisjrd: move to dataset_util.py or similar
224+
def _datasets_json_match(product_type, native_id):
219225
dataset_json = datasets_json_util.DatasetsJson()
220226
cslc_granule_regex = dataset_json.get(product_type)["match_pattern"]
221227
match_product_id = re.match(cslc_granule_regex, native_id)
222228

223229
if not match_product_id:
224230
raise ValueError(f"{product_type} native ID {native_id} could not be parsed with regex from datasets.json")
231+
return match_product_id
225232

226-
burst_id = match_product_id.group("burst_id") # e.g. T074-157286-IW3 (for RTC and CSLC)
227-
acquisition_dts = match_product_id.group("acquisition_ts") # e.g. 20210705T183117Z
228-
return burst_id, acquisition_dts
229233

230234
def parse_cslc_file_name(native_id):
231235
return parse_r2_product_file_name(native_id, "L2_CSLC_S1")

data_subscriber/daac_data_subscriber.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def run(argv: list[str]):
5555

5656
es_conn = supply_es_conn(args)
5757

58-
logger.debug(f"daac_data_subscriber.py invoked with {args=}")
58+
logger.info(f"daac_data_subscriber.py invoked with {args=}")
5959

6060
job_id = supply_job_id()
6161
logger.debug(f"Using {job_id=}")

data_subscriber/dist_s1_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,21 +87,21 @@ def process_dist_burst_db(file):
8787
logger.info(f"Processing {df.shape[0]} rows in the DIST-S1 burst database file...")
8888

8989
# Create a dictionary of tile ids and the products that are associated with them
90-
for index, row in df.iterrows():
90+
for row in df.itertuples():
9191
#print(row['mgrs_tile_id'], row['acq_group_id_within_mgrs_tile'])
92-
tile_id = row['mgrs_tile_id']
93-
unique_acquisition = row['acq_group_id_within_mgrs_tile']
92+
tile_id = row.mgrs_tile_id
93+
unique_acquisition = row.acq_group_id_within_mgrs_tile
9494
product_id = tile_id + "_" + str(unique_acquisition)
9595
if product_id not in dist_products[tile_id]:
9696
dist_products[tile_id].add(product_id)
9797

98-
jpl_burst_id = row['jpl_burst_id']
98+
jpl_burst_id = row.jpl_burst_id
9999
bursts_to_products[jpl_burst_id].add(product_id)
100100
product_to_bursts[product_id].add(jpl_burst_id)
101101

102102
if jpl_burst_id in all_burst_ids:
103103
rtc_bursts_reused += 1
104-
all_burst_ids.add(row['jpl_burst_id'])
104+
all_burst_ids.add(row.jpl_burst_id)
105105

106106
print(f"Total of {len(all_burst_ids)} unique RTC bursts in this database file.")
107107
print(f"RTC Bursts were reused {rtc_bursts_reused} times in this database file.")

0 commit comments

Comments
 (0)