Skip to content

Commit cf42d03

Browse files
committed
feat: functional tabulator view of upgrade status
1 parent 071f00f commit cf42d03

File tree

1 file changed

+31
-11
lines changed

1 file changed

+31
-11
lines changed

src/aind_metadata_viz/upgrade.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
1-
import logging
21
from aind_data_access_api.rds_tables import RDSCredentials, Client
32
from aind_data_access_api.document_db import MetadataDbClient
43
import pandas as pd
54
import panel as pn
6-
7-
# Configure logging
8-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
9-
logger = logging.getLogger(__name__)
5+
from aind_metadata_upgrader.upgrade import Upgrade
106

117
# Redshift settings
128
REDSHIFT_SECRETS = "/aind/prod/redshift/credentials/readwrite"
139
RDS_TABLE_NAME = "metadata_upgrade_status_prod"
1410

11+
pn.extension('tabulator')
12+
1513

1614
extra_columns = {
1715
"_id": 1,
@@ -23,47 +21,69 @@
2321

2422
@pn.cache()
2523
def get_extra_col_df():
24+
print("Retrieving extra columns from DocDB...")
2625
client = MetadataDbClient(
2726
host="api.allenneuraldynamics.org",
2827
version="v1",
2928
)
30-
records = client.retrieve_docdb_records(
29+
30+
all_records = client.retrieve_docdb_records(
3131
filter_query={},
32-
projection=extra_columns,
32+
projection={"_id": 1},
3333
limit=0,
3434
)
35+
all_ids = [record["_id"] for record in all_records]
36+
37+
# Batch by 100 to avoid excessively large queries
38+
batch_size = 100
39+
40+
records = []
41+
for start_idx in range(0, len(all_ids), batch_size):
42+
print(f"Retrieving records {start_idx} to {start_idx + batch_size}...")
43+
end_idx = start_idx + batch_size
44+
batch_ids = all_ids[start_idx:end_idx]
45+
filter_query = {"_id": {"$in": batch_ids}}
46+
batch_records = client.retrieve_docdb_records(
47+
filter_query=filter_query,
48+
projection=extra_columns,
49+
limit=0,
50+
)
51+
records.extend(batch_records)
52+
3553
for i, record in enumerate(records):
3654
data_description = record.get("data_description", {})
3755
if data_description:
3856
record["data_level"] = data_description.get("data_level", None)
3957
record["project_name"] = data_description.get("project_name", None)
4058
record.pop("data_description")
4159

42-
record.pop("_id")
4360
records[i] = record
61+
print(f"Retrieved {len(records)} records from DocDB.")
4462
return pd.DataFrame(records)
4563

4664

4765
@pn.cache()
4866
def get_redshift_table():
67+
print("Connecting to Redshift RDS...")
4968
rds_client = Client(
5069
credentials=RDSCredentials(
5170
aws_secrets_name=REDSHIFT_SECRETS,
5271
),
5372
)
5473
df = rds_client.read_table(RDS_TABLE_NAME)
74+
print(f"Retrieved {len(df)} records from Redshift table.")
5575
return df
5676

5777

5878
@pn.cache()
5979
def get_data():
60-
logger.info("Loading extra columns from DocDB...")
80+
print("Loading extra columns from DocDB...")
6181
extra_col_df = get_extra_col_df()
62-
logger.info("Loading Redshift table...")
82+
print("Loading Redshift table...")
6383
df = get_redshift_table()
6484
if df is None or len(df) == 0:
6585
return pn.pane.Markdown("**Table is empty or could not be read**")
66-
logger.info("Merging extra columns...")
86+
print("Merging extra columns...")
6787
df = df.merge(extra_col_df, how="left", left_on="v1_id", right_on="_id")
6888
return df
6989

0 commit comments

Comments
 (0)