@@ -69,7 +69,7 @@ def process_chunk(
6969 process_icon : bool ,
7070 total_rows : int ,
7171):
72- chunk_info = f"Chunk { df_chunk .index [0 ]} -{ df_chunk .index [- 1 ]} /{ total_rows } "
72+ chunk_info = f"{ store = } chunk= { df_chunk .index [0 ]} -{ df_chunk .index [- 1 ]} /{ total_rows } "
7373 logger .info (f"{ chunk_info } start" )
7474 database_connection = get_db_connection (use_ssh_tunnel = use_ssh_tunnel )
7575 try :
@@ -99,6 +99,7 @@ def process_chunk(
9999 process_icon = process_icon ,
100100 df_chunk = df_chunk ,
101101 )
102+ logger .info (f"{ chunk_info } finished" )
102103 except Exception as e :
103104 logger .exception (f"{ chunk_info } error processing with { e } " )
104105 finally :
@@ -118,7 +119,7 @@ def update_app_details(
118119 country_priority_group ,
119120):
120121 """Process apps with dynamic work queue - simple and efficient."""
121- log_info = f"Update app details: { store = } "
122+ log_info = f"{ store = } update app details "
122123
123124 df = query_store_apps_to_update (
124125 store = store ,
@@ -145,7 +146,7 @@ def update_app_details(
145146 chunks .append (country_df .iloc [i : i + chunk_size ])
146147 total_chunks = len (chunks )
147148 total_rows = len (df )
148- logger .info (f"Processing { total_rows } apps in { total_chunks } chunks" )
149+ logger .info (f"{ log_info } processing { total_rows } apps in { total_chunks } chunks" )
149150
150151 completed_count = 0
151152 failed_count = 0
@@ -158,37 +159,26 @@ def update_app_details(
158159 process_chunk , df_chunk , store , use_ssh_tunnel , process_icon , total_rows
159160 )
160161 future_to_idx [future ] = idx
161-
162162 # Only stagger the initial batch to avoid simultaneous API burst
163163 if idx < workers :
164164 time .sleep (0.5 ) # 500ms between initial worker starts
165-
166- logger .info (f"All { total_chunks } chunks submitted (first { workers } staggered)" )
167-
165+ logger .info (f"{ log_info } all { total_chunks } chunks submitted" )
168166 # Process results as they complete
169167 for future in as_completed (future_to_idx ):
170168 chunk_idx = future_to_idx [future ]
171-
172169 try :
173170 _result = future .result ()
174171 completed_count += 1
175-
176172 if completed_count % 10 == 0 or completed_count == total_chunks :
177173 logger .info (
178174 f"Progress: { completed_count } /{ total_chunks } chunks "
179175 f"({ completed_count / total_chunks * 100 :.1f} %) | "
180176 f"Failed: { failed_count } "
181177 )
182-
183178 except Exception as e :
184179 failed_count += 1
185180 logger .error (f"Chunk { chunk_idx } failed: { e } " )
186-
187- logger .info (
188- f"{ log_info } finished | Completed: { completed_count } | Failed: { failed_count } "
189- )
190-
191- return completed_count , failed_count
181+ logger .info (f"{ log_info } completed={ completed_count } failed={ failed_count } " )
192182
193183
194184def crawl_keyword_cranks (database_connection : PostgresCon ) -> None :
@@ -592,7 +582,7 @@ def scrape_app(
592582 country : str ,
593583 language : str ,
594584) -> dict :
595- scrape_info = f"{ store = } , { country = } , { language = } , { store_id = } , "
585+ scrape_info = f"{ store = } , { country = } , { language = } , { store_id = } "
596586 max_retries = 2
597587 base_delay = 1
598588 retries = 0
@@ -646,17 +636,17 @@ def scrape_app(
646636 result_dict ["store_id" ] = store_id
647637 result_dict ["queried_language" ] = language .lower ()
648638 result_dict ["country" ] = country .upper ()
649- logger .info (f"{ scrape_info } result={ crawl_result } scrape finished" )
639+ logger .debug (f"{ scrape_info } result={ crawl_result } scrape finished" )
650640 return result_dict
651641
652642
653643def save_developer_info (
654644 apps_df : pd .DataFrame ,
655645 database_connection : PostgresCon ,
656646) -> pd .DataFrame :
657- assert apps_df ["developer_id" ].to_numpy ()[0 ], (
658- f" { apps_df [ 'store_id' ] } Missing Developer ID"
659- )
647+ assert apps_df ["developer_id" ].to_numpy ()[
648+ 0
649+ ], f" { apps_df [ 'store_id' ] } Missing Developer ID"
660650 df = (
661651 apps_df [["store" , "developer_id" , "developer_name" ]]
662652 .rename (columns = {"developer_name" : "name" })
0 commit comments