Skip to content

Commit d680f92

Browse files
committed
NRL-1665 batch write requests and support multiple pointers per patient
1 parent dbd034f commit d680f92

File tree

1 file changed

+43
-17
lines changed

1 file changed

+43
-17
lines changed

scripts/seed_nft_tables.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
)
1919
from nrlf.core.dynamodb.model import DocumentPointer
2020
from nrlf.core.logger import logger
21-
from nrlf.core.validators import DocumentReferenceValidator
2221
from nrlf.tests.data import load_document_reference
2322

2423
dynamodb = boto3.client("dynamodb")
@@ -89,6 +88,8 @@
8988
}, # summary record currently has only one supplier
9089
}
9190

91+
DEFAULT_COUNT_DISTRIBUTIONS = {"1": 91, "2": 8, "3": 1}
92+
9293

9394
class TestNhsNumbersIterator:
9495
def __iter__(self):
@@ -143,41 +144,52 @@ def _populate_seed_table(
143144
table_name: str,
144145
px_with_pointers: int,
145146
pointers_per_px: float = 1.0,
146-
type_dists=DEFAULT_TYPE_DISTRIBUTIONS,
147-
custodian_dists=DEFAULT_CUSTODIAN_DISTRIBUTIONS,
147+
type_dists: dict[str, int] = DEFAULT_TYPE_DISTRIBUTIONS,
148+
custodian_dists: dict[str, int] = DEFAULT_CUSTODIAN_DISTRIBUTIONS,
148149
):
149-
150-
table = resource.Table(table_name)
151-
150+
"""
151+
Seeds a table with example data for non-functional testing.
152+
"""
153+
if pointers_per_px < 1.0:
154+
raise ValueError("Cannot populate table with patients with zero pointers")
152155
# set up iterations
153156
type_iter = _set_up_cyclical_iterator(type_dists)
154157
custodian_iters = _set_up_custodian_iterators(custodian_dists)
158+
count_iter = _set_up_cyclical_iterator(DEFAULT_COUNT_DISTRIBUTIONS)
155159
testnum_cls = TestNhsNumbersIterator()
156160
testnum_iter = iter(testnum_cls)
157161

158162
px_counter = 0
159-
doc_ref_target = pointers_per_px * px_with_pointers
163+
doc_ref_target = int(pointers_per_px * px_with_pointers)
160164
print(
161165
f"Will upsert {doc_ref_target} test pointers for {px_with_pointers} patients."
162166
)
163167
doc_ref_counter = 0
168+
batch_counter = 0
164169

165170
start_time = datetime.now(tz=timezone.utc)
166171

167-
while px_counter < px_with_pointers:
172+
batch_upsert_items = []
173+
while px_counter <= px_with_pointers:
174+
pointers_for_px = int(next(count_iter))
175+
if batch_counter + pointers_for_px > 25 or px_counter == px_with_pointers:
176+
resource.batch_write_item(RequestItems={table_name: batch_upsert_items})
177+
batch_upsert_items = []
178+
batch_counter = 0
179+
168180
new_px = next(testnum_iter)
169-
new_type = next(type_iter)
170-
new_custodian = next(custodian_iters[new_type])
171-
px_counter += 1
172-
doc_ref_counter += 1
173-
try:
174-
print(f"Putting item {doc_ref_counter}....")
181+
for _ in range(pointers_for_px):
182+
new_type = next(type_iter)
183+
new_custodian = next(custodian_iters[new_type])
184+
doc_ref_counter += 1
185+
batch_counter += 1
186+
175187
pointer = _make_seed_pointer(
176188
new_type, new_custodian, new_px, doc_ref_counter
177189
)
178-
table.put_item(Item=pointer.model_dump())
179-
except Exception as e:
180-
print(f"Unable to upsert pointer for item {doc_ref_counter}. Error: {e}")
190+
put_req = {"PutRequest": {"Item": pointer.model_dump()}}
191+
batch_upsert_items.append(put_req)
192+
px_counter += 1
181193

182194
end_time = datetime.now(tz=timezone.utc)
183195
print(
@@ -211,5 +223,19 @@ def _set_up_custodian_iterators(
211223
return custodian_iters
212224

213225

226+
def _set_up_count_iterator(pointers_per_px: float) -> iter:
227+
"""
228+
Given a target average number of pointers per patient,
229+
generates a distribution of counts per individual patient.
230+
"""
231+
232+
sum = int((pointers_per_px - 1.0) * 100) # no patients can have zero pointers
233+
counts = {}
234+
counts["3"] = sum // 10
235+
counts["2"] = sum - 2 * counts["3"]
236+
counts["1"] = 100 - counts[2] - counts[3]
237+
return _set_up_cyclical_iterator(counts)
238+
239+
214240
if __name__ == "__main__":
215241
fire.Fire(_populate_seed_table)

0 commit comments

Comments
 (0)