Skip to content

Commit 1e18727

Browse files
feat(opensearch): FTRS-856 Index creation and population
1 parent 8b67aa5 commit 1e18727

File tree

3 files changed

+64
-15
lines changed

3 files changed

+64
-15
lines changed

scripts/workflow/populate_open_search_index.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,11 @@
5353
"primary_key_template": PRIMARY_KEY_TEMPLATE,
5454
"doc_id_fields": DOC_ID_FIELDS,
5555
"top_level": {
56-
"primary_key": ["primary_key"]
56+
"primary_key": ["primary_key", "id"]
5757
},
5858
"nested": {
5959
NESTED_COLLECTION_FIELD: {
60-
"source_attributes": [
61-
"symptomGroupSymptomDiscriminators",
62-
"symptomGroupSymptomDiscriminator",
63-
"symptomGroup_symptomDiscriminators"
64-
],
60+
"source_attributes": ["symptomGroupSymptomDiscriminators"],
6561
"items": {
6662
"sg": "sg",
6763
"sd": "sd"
@@ -484,7 +480,7 @@ def main(argv: Optional[list[str]] = None) -> int:
484480

485481
try:
486482
log.info('Scanning DynamoDB table...')
487-
raw_items = scan_dynamodb_table(prepare_dynamodb_client(aws_region), final_table, ['primary_key', 'symptomGroupSymptomDiscriminators', 'symptomGroupSymptomDiscriminator', 'symptomGroup_symptomDiscriminators'])
483+
raw_items = scan_dynamodb_table(prepare_dynamodb_client(aws_region), final_table, ['id', 'primary_key', 'symptomGroupSymptomDiscriminators'])
488484
log.info('Transforming records...')
489485
transformed = transform_records(raw_items, schema_config)
490486
if transformed:

services/dos-search/tests/unit/open_search_index/test_open_search.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -406,11 +406,9 @@ def test_transform_records_missing_nested_logs(
406406
else "primary_key",
407407
mod.DEFAULT_SCHEMA_CONFIG,
408408
)
409-
# construct a raw record with primary_key present but no nested source attributes
410-
raw_items = [{"primary_key": {"S": "p1"}}]
409+
raw_items = [{"id": {"S": "p1"}}]
411410
caplog.set_level("DEBUG")
412411
out = mod.transform_records(raw_items, None)
413-
# expect a result list and debug may have noted missing nested
414412
assert isinstance(out, list)
415413

416414

@@ -433,7 +431,7 @@ def test_main_success_full_flow(create_populate_module: Any) -> None:
433431
),
434432
patch(
435433
"populate_open_search_index.scan_dynamodb_table",
436-
return_value=[{"primary_key": {"S": "1"}}],
434+
return_value=[{"id": {"S": "1"}}],
437435
),
438436
patch(
439437
"populate_open_search_index.transform_records",

services/dos-search/tests/unit/open_search_index/test_populate_index.py

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,13 +260,11 @@ def test_transform_records_template_keyerror_handled(
260260
create_populate_module: Any,
261261
) -> None:
262262
mod = create_populate_module
263-
raw_items = [
264-
{"primary_key": {"S": "pk1"}, "symptomGroupSymptomDiscriminators": {"L": []}}
265-
]
263+
raw_items = [{"id": {"S": "pk1"}, "symptomGroupSymptomDiscriminators": {"L": []}}]
266264
schema = {
267265
"primary_key_template": "{primary_key}-{missing}",
268266
"doc_id_fields": ["primary_key"],
269-
"top_level": {"primary_key": ["primary_key"]},
267+
"top_level": {"primary_key": ["primary_key", "id"]},
270268
"nested": {},
271269
}
272270
out = mod.transform_records(raw_items, schema)
@@ -475,3 +473,60 @@ def test_index_records_chunk_partial_failure_logs(
475473
assert total == 4
476474
assert success == 0
477475
assert any("Bulk chunk had" in r.message for r in caplog.records)
476+
477+
478+
def test_main_passes_projection_to_scan(create_populate_module: Any) -> None:
479+
mod = create_populate_module
480+
called = {}
481+
482+
def fake_scan(_client, table, attrs):
483+
called["table"] = table
484+
called["attrs"] = attrs
485+
return []
486+
487+
with (
488+
patch(
489+
"populate_open_search_index.prepare_dynamodb_client",
490+
return_value=MagicMock(),
491+
),
492+
patch("populate_open_search_index.scan_dynamodb_table", side_effect=fake_scan),
493+
patch(
494+
"populate_open_search_index.SignedRequestsSession", return_value=MagicMock()
495+
),
496+
):
497+
rc = mod.main(["--endpoint", "https://example", "--final-index", "triage_code"])
498+
assert rc == 0
499+
500+
assert "attrs" in called
501+
assert "id" in called["attrs"]
502+
assert "symptomGroupSymptomDiscriminators" in called["attrs"]
503+
504+
505+
def test_transform_full_item_maps_id_and_nested(create_populate_module: Any) -> None:
506+
"""Use a representative DynamoDB item and verify transform_records produces the expected document shape."""
507+
mod = create_populate_module
508+
sample = {
509+
"id": {"S": "6f3d7dd4-e50b-5d8d-be2b-455f091b4df2"},
510+
"field": {"S": "document"},
511+
"active": {"BOOL": True},
512+
"symptomGroupSymptomDiscriminators": {
513+
"L": [
514+
{"M": {"sd": {"N": "4052"}, "sg": {"N": "1006"}}},
515+
{"M": {"sd": {"N": "4052"}, "sg": {"N": "1004"}}},
516+
]
517+
},
518+
}
519+
520+
out = mod.transform_records([sample])
521+
assert isinstance(out, list)
522+
assert len(out) == 1
523+
doc = out[0]
524+
# primary_key should be built from id alias
525+
assert "primary_key" in doc
526+
assert doc["primary_key"] == "6f3d7dd4-e50b-5d8d-be2b-455f091b4df2"
527+
# nested field name determined by mapping; expect list with two items
528+
nested = doc.get(mod.NESTED_COLLECTION_FIELD)
529+
assert isinstance(nested, list)
530+
assert len(nested) == 2
531+
assert nested[0]["sg"] == 1006
532+
assert nested[0]["sd"] == 4052

0 commit comments

Comments
 (0)