|
| 1 | +from datetime import datetime, timedelta, timezone |
| 2 | +from typing import Any, Optional |
| 3 | + |
| 4 | +import boto3 |
| 5 | +import fire |
| 6 | + |
| 7 | +dynamodb = boto3.client("dynamodb") |
| 8 | +paginator = dynamodb.get_paginator("scan") |
| 9 | + |
| 10 | + |
| 11 | +def _count_pointers( |
| 12 | + table_name: str, ods_code: str, created_date: Optional[str] = None |
| 13 | +) -> dict[str, float]: |
| 14 | + """ |
| 15 | + Count the number of pointers for a given custodian (ODS code) in the pointers table. |
| 16 | + Parameters: |
| 17 | + - table_name: The name of the pointers table to use. |
| 18 | + - ods_code: The custodian ODS code to find pointers for. |
| 19 | + - created_date: The created date to filter pointers on. (optional) |
| 20 | + """ |
| 21 | + |
| 22 | + print(f"Counting pointers for {ods_code} in table {table_name}....") # noqa |
| 23 | + |
| 24 | + params: dict[str, Any] = { |
| 25 | + "TableName": table_name, |
| 26 | + "FilterExpression": "custodian = :ods_code", |
| 27 | + "ExpressionAttributeValues": {":ods_code": {"S": ods_code}}, |
| 28 | + "Select": "COUNT", |
| 29 | + "PaginationConfig": {"PageSize": 100}, |
| 30 | + } |
| 31 | + |
| 32 | + if created_date: |
| 33 | + params["FilterExpression"] += " AND starts_with(created_on, :created_date)" |
| 34 | + params["ExpressionAttributeValues"][":created_date"] = {"S": created_date} |
| 35 | + |
| 36 | + custodian_pointers_count = 0 |
| 37 | + total_scanned_count = 0 |
| 38 | + |
| 39 | + start_time = datetime.now(tz=timezone.utc) |
| 40 | + |
| 41 | + for page in paginator.paginate(**params): |
| 42 | + custodian_pointers_count += page["Count"] |
| 43 | + total_scanned_count += page["ScannedCount"] |
| 44 | + |
| 45 | + if total_scanned_count % 1000 == 0: |
| 46 | + print(".", end="", flush=True) # noqa |
| 47 | + |
| 48 | + end_time = datetime.now(tz=timezone.utc) |
| 49 | + |
| 50 | + print(" Done") # noqa |
| 51 | + return { |
| 52 | + "items_found": custodian_pointers_count, |
| 53 | + "scanned_count": total_scanned_count, |
| 54 | + "took-secs": timedelta.total_seconds(end_time - start_time), |
| 55 | + } |
| 56 | + |
| 57 | + |
| 58 | +if __name__ == "__main__": |
| 59 | + fire.Fire(_count_pointers) |
0 commit comments