Skip to content

Commit 7b902b3

Browse files
committed
[NRL-1146] Add report to count pointers for a custodian
1 parent fc29146 commit 7b902b3

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from datetime import datetime, timedelta, timezone
2+
from typing import Any, Optional
3+
4+
import boto3
5+
import fire
6+
7+
dynamodb = boto3.client("dynamodb")
8+
paginator = dynamodb.get_paginator("scan")
9+
10+
11+
def _count_pointers(
12+
table_name: str, ods_code: str, created_date: Optional[str] = None
13+
) -> dict[str, float]:
14+
"""
15+
Count the number of pointers for a given custodian (ODS code) in the pointers table.
16+
Parameters:
17+
- table_name: The name of the pointers table to use.
18+
- ods_code: The custodian ODS code to find pointers for.
19+
- created_date: The created date to filter pointers on. (optional)
20+
"""
21+
22+
print(f"Counting pointers for {ods_code} in table {table_name}....") # noqa
23+
24+
params: dict[str, Any] = {
25+
"TableName": table_name,
26+
"FilterExpression": "custodian = :ods_code",
27+
"ExpressionAttributeValues": {":ods_code": {"S": ods_code}},
28+
"Select": "COUNT",
29+
"PaginationConfig": {"PageSize": 100},
30+
}
31+
32+
if created_date:
33+
params["FilterExpression"] += " AND starts_with(created_on, :created_date)"
34+
params["ExpressionAttributeValues"][":created_date"] = {"S": created_date}
35+
36+
custodian_pointers_count = 0
37+
total_scanned_count = 0
38+
39+
start_time = datetime.now(tz=timezone.utc)
40+
41+
for page in paginator.paginate(**params):
42+
custodian_pointers_count += page["Count"]
43+
total_scanned_count += page["ScannedCount"]
44+
45+
if total_scanned_count % 1000 == 0:
46+
print(".", end="", flush=True) # noqa
47+
48+
end_time = datetime.now(tz=timezone.utc)
49+
50+
print(" Done") # noqa
51+
return {
52+
"items_found": custodian_pointers_count,
53+
"scanned_count": total_scanned_count,
54+
"took-secs": timedelta.total_seconds(end_time - start_time),
55+
}
56+
57+
58+
if __name__ == "__main__":
59+
fire.Fire(_count_pointers)

0 commit comments

Comments
 (0)