Skip to content

Commit d8b0a0e

Browse files
committed
[NRL-1606] Add report to get masterids for pointers
1 parent a4aeccd commit d8b0a0e

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import json
2+
from datetime import datetime, timedelta, timezone
3+
from typing import Any
4+
5+
import boto3
6+
import fire
7+
8+
dynamodb = boto3.client("dynamodb")
9+
paginator = dynamodb.get_paginator("scan")
10+
11+
12+
def _get_masterids_for_custodians(table_name: str, custodians: str) -> Any:
13+
"""
14+
Get masterids for pointers in the given table for a list of custodians.
15+
Parameters:
16+
- table_name: The name of the pointers table to use.
17+
"""
18+
19+
print( # noqa
20+
f"Getting masterids for custodians {custodians} in table {table_name}...."
21+
)
22+
23+
expression_names_str = ",".join([f":param_{custodian}" for custodian in custodians])
24+
expression_values_list = {
25+
f":param_{custodian}": {"S": custodian} for custodian in custodians
26+
}
27+
28+
params: dict[str, Any] = {
29+
"TableName": table_name,
30+
"PaginationConfig": {"PageSize": 50},
31+
"FilterExpression": f"custodian IN ({expression_names_str})",
32+
"ExpressionAttributeValues": expression_values_list,
33+
"ProjectionExpression": "id, type_id, master_identifier",
34+
}
35+
36+
pointers_info: list[dict[str, str]] = []
37+
total_scanned_count = 0
38+
39+
start_time = datetime.now(tz=timezone.utc)
40+
41+
for page in paginator.paginate(**params):
42+
for item in page["Items"]:
43+
pointer_id = item.get("id", {}).get("S", "no-id")
44+
pointer_type = item.get("type_id", {}).get("S", "no-type")
45+
master_id = item.get("master_identifier", {}).get("S", "no-master-id")
46+
47+
pointers_info.append(
48+
{
49+
"nrl-id": pointer_id,
50+
"pointer-type": pointer_type,
51+
"master_identifier": master_id,
52+
}
53+
)
54+
55+
total_scanned_count += page["ScannedCount"]
56+
57+
if total_scanned_count % 1000 == 0:
58+
print(".", end="", flush=True) # noqa
59+
60+
if total_scanned_count % 100000 == 0:
61+
print(f"scanned={total_scanned_count} found={len(pointers_info)} ") # noqa
62+
63+
end_time = datetime.now(tz=timezone.utc)
64+
65+
print(" Done") # noqa
66+
67+
print(f"Writing pointers to file ./pointer-masterids.txt ...") # noqa
68+
with open(f"pointer-masterids.txt", "w") as f:
69+
f.write(json.dumps(pointers_info, indent=2))
70+
71+
return {
72+
"output-file": "pointer-masterids.txt",
73+
"pointers-found": len(pointers_info),
74+
"scanned-count": total_scanned_count,
75+
"took-secs": timedelta.total_seconds(end_time - start_time),
76+
}
77+
78+
79+
if __name__ == "__main__":
80+
fire.Fire(_get_masterids_for_custodians)

0 commit comments

Comments
 (0)