@@ -262,13 +262,14 @@ def prep_app_metrics_history(
262262 return df
263263
264264
265- def manual_import_app_metrics_from_s3 () -> None :
265+ def manual_import_app_metrics_from_s3 (
266+ start_date : datetime .date , end_date : datetime .date
267+ ) -> None :
266268 use_tunnel = False
267269 database_connection = get_db_connection (
268270 use_ssh_tunnel = use_tunnel , config_key = "madrone"
269271 )
270- start_date = datetime .date (2025 , 10 , 31 )
271- end_date = datetime .date (2025 , 10 , 31 )
272+
272273 for snapshot_date in pd .date_range (start_date , end_date , freq = "D" ):
273274 snapshot_date = snapshot_date .date ()
274275 for store in [1 , 2 ]:
@@ -297,9 +298,19 @@ def process_app_metrics_to_db(
297298 logger .info (f"date={ snapshot_date } , store={ store } agg df load" )
298299 df = get_s3_agg_daily_snapshots (snapshot_date , snapshot_date , store )
299300 if store == 2 :
300- df .loc [df ["store_id" ].str .contains (".0" ), "store_id" ] = (
301- df .loc [df ["store_id" ].str .contains (".0" ), "store_id" ].str .split ("." ).str [0 ]
302- )
301+ # Should be resolved from 11/1/2025
302+ problem_rows = df ["store_id" ].str .contains (".0" )
303+ if problem_rows .any ():
304+ logger .warning (
305+ f'Apple App IDs: Found { problem_rows .sum ()} store_id with ".0" suffix, fixing'
306+ )
307+ df .loc [problem_rows , "store_id" ] = (
308+ df .loc [problem_rows , "store_id" ].str .split ("." ).str [0 ]
309+ )
310+ df ["crawled_at" ] = df ["crawled_at" ].sort_values (ascending = True )
311+ df = df .drop_duplicates (
312+ ["snapshot_date" , "country" , "store_id" ], keep = "last"
313+ )
303314 if df .empty :
304315 logger .warning (
305316 f"No data found for S3 agg app metrics { store = } { snapshot_date = } "
0 commit comments