|
18 | 18 | ) |
19 | 19 | from nrlf.core.dynamodb.model import DocumentPointer |
20 | 20 | from nrlf.core.logger import logger |
21 | | -from nrlf.core.validators import DocumentReferenceValidator |
22 | 21 | from nrlf.tests.data import load_document_reference |
23 | 22 |
|
24 | 23 | dynamodb = boto3.client("dynamodb") |
|
89 | 88 | }, # summary record currently has only one supplier |
90 | 89 | } |
91 | 90 |
|
| 91 | +DEFAULT_COUNT_DISTRIBUTIONS = {"1": 91, "2": 8, "3": 1} |
| 92 | + |
92 | 93 |
|
93 | 94 | class TestNhsNumbersIterator: |
94 | 95 | def __iter__(self): |
@@ -143,41 +144,52 @@ def _populate_seed_table( |
143 | 144 | table_name: str, |
144 | 145 | px_with_pointers: int, |
145 | 146 | pointers_per_px: float = 1.0, |
146 | | - type_dists=DEFAULT_TYPE_DISTRIBUTIONS, |
147 | | - custodian_dists=DEFAULT_CUSTODIAN_DISTRIBUTIONS, |
| 147 | + type_dists: dict[str, int] = DEFAULT_TYPE_DISTRIBUTIONS, |
| 148 | + custodian_dists: dict[str, int] = DEFAULT_CUSTODIAN_DISTRIBUTIONS, |
148 | 149 | ): |
149 | | - |
150 | | - table = resource.Table(table_name) |
151 | | - |
| 150 | + """ |
| 151 | + Seeds a table with example data for non-functional testing. |
| 152 | + """ |
| 153 | + if pointers_per_px < 1.0: |
| 154 | + raise ValueError("Cannot populate table with patients with zero pointers") |
152 | 155 | # set up iterations |
153 | 156 | type_iter = _set_up_cyclical_iterator(type_dists) |
154 | 157 | custodian_iters = _set_up_custodian_iterators(custodian_dists) |
| 158 | + count_iter = _set_up_cyclical_iterator(DEFAULT_COUNT_DISTRIBUTIONS) |
155 | 159 | testnum_cls = TestNhsNumbersIterator() |
156 | 160 | testnum_iter = iter(testnum_cls) |
157 | 161 |
|
158 | 162 | px_counter = 0 |
159 | | - doc_ref_target = pointers_per_px * px_with_pointers |
| 163 | + doc_ref_target = int(pointers_per_px * px_with_pointers) |
160 | 164 | print( |
161 | 165 | f"Will upsert {doc_ref_target} test pointers for {px_with_pointers} patients." |
162 | 166 | ) |
163 | 167 | doc_ref_counter = 0 |
| 168 | + batch_counter = 0 |
164 | 169 |
|
165 | 170 | start_time = datetime.now(tz=timezone.utc) |
166 | 171 |
|
167 | | - while px_counter < px_with_pointers: |
| 172 | + batch_upsert_items = [] |
| 173 | + while px_counter <= px_with_pointers: |
| 174 | + pointers_for_px = int(next(count_iter)) |
| 175 | + if batch_counter + pointers_for_px > 25 or px_counter == px_with_pointers: |
| 176 | + resource.batch_write_item(RequestItems={table_name: batch_upsert_items}) |
| 177 | + batch_upsert_items = [] |
| 178 | + batch_counter = 0 |
| 179 | + |
168 | 180 | new_px = next(testnum_iter) |
169 | | - new_type = next(type_iter) |
170 | | - new_custodian = next(custodian_iters[new_type]) |
171 | | - px_counter += 1 |
172 | | - doc_ref_counter += 1 |
173 | | - try: |
174 | | - print(f"Putting item {doc_ref_counter}....") |
| 181 | + for _ in range(pointers_for_px): |
| 182 | + new_type = next(type_iter) |
| 183 | + new_custodian = next(custodian_iters[new_type]) |
| 184 | + doc_ref_counter += 1 |
| 185 | + batch_counter += 1 |
| 186 | + |
175 | 187 | pointer = _make_seed_pointer( |
176 | 188 | new_type, new_custodian, new_px, doc_ref_counter |
177 | 189 | ) |
178 | | - table.put_item(Item=pointer.model_dump()) |
179 | | - except Exception as e: |
180 | | - print(f"Unable to upsert pointer for item {doc_ref_counter}. Error: {e}") |
| 190 | + put_req = {"PutRequest": {"Item": pointer.model_dump()}} |
| 191 | + batch_upsert_items.append(put_req) |
| 192 | + px_counter += 1 |
181 | 193 |
|
182 | 194 | end_time = datetime.now(tz=timezone.utc) |
183 | 195 | print( |
@@ -211,5 +223,19 @@ def _set_up_custodian_iterators( |
211 | 223 | return custodian_iters |
212 | 224 |
|
213 | 225 |
|
| 226 | +def _set_up_count_iterator(pointers_per_px: float) -> iter: |
| 227 | + """ |
| 228 | + Given a target average number of pointers per patient, |
| 229 | + generates a distribution of counts per individual patient. |
| 230 | + """ |
| 231 | + |
| 232 | + sum = int((pointers_per_px - 1.0) * 100) # no patients can have zero pointers |
| 233 | + counts = {} |
| 234 | + counts["3"] = sum // 10 |
| 235 | + counts["2"] = sum - 2 * counts["3"] |
| 236 | + counts["1"] = 100 - counts[2] - counts[3] |
| 237 | + return _set_up_cyclical_iterator(counts) |
| 238 | + |
| 239 | + |
214 | 240 | if __name__ == "__main__": |
215 | 241 | fire.Fire(_populate_seed_table) |
0 commit comments