Skip to content

Commit bd6fcac

Browse files
committed
Clean warning message
1 parent 0ec6d30 commit bd6fcac

File tree

1 file changed

+11
-21
lines changed

1 file changed

+11
-21
lines changed

adscrawler/app_stores/scrape_stores.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def process_chunk(
6969
process_icon: bool,
7070
total_rows: int,
7171
):
72-
chunk_info = f"Chunk {df_chunk.index[0]}-{df_chunk.index[-1]}/{total_rows}"
72+
chunk_info = f"{store=} chunk={df_chunk.index[0]}-{df_chunk.index[-1]}/{total_rows}"
7373
logger.info(f"{chunk_info} start")
7474
database_connection = get_db_connection(use_ssh_tunnel=use_ssh_tunnel)
7575
try:
@@ -99,6 +99,7 @@ def process_chunk(
9999
process_icon=process_icon,
100100
df_chunk=df_chunk,
101101
)
102+
logger.info(f"{chunk_info} finished")
102103
except Exception as e:
103104
logger.exception(f"{chunk_info} error processing with {e}")
104105
finally:
@@ -118,7 +119,7 @@ def update_app_details(
118119
country_priority_group,
119120
):
120121
"""Process apps with dynamic work queue - simple and efficient."""
121-
log_info = f"Update app details: {store=}"
122+
log_info = f"{store=} update app details"
122123

123124
df = query_store_apps_to_update(
124125
store=store,
@@ -145,7 +146,7 @@ def update_app_details(
145146
chunks.append(country_df.iloc[i : i + chunk_size])
146147
total_chunks = len(chunks)
147148
total_rows = len(df)
148-
logger.info(f"Processing {total_rows} apps in {total_chunks} chunks")
149+
logger.info(f"{log_info} processing {total_rows} apps in {total_chunks} chunks")
149150

150151
completed_count = 0
151152
failed_count = 0
@@ -158,37 +159,26 @@ def update_app_details(
158159
process_chunk, df_chunk, store, use_ssh_tunnel, process_icon, total_rows
159160
)
160161
future_to_idx[future] = idx
161-
162162
# Only stagger the initial batch to avoid simultaneous API burst
163163
if idx < workers:
164164
time.sleep(0.5) # 500ms between initial worker starts
165-
166-
logger.info(f"All {total_chunks} chunks submitted (first {workers} staggered)")
167-
165+
logger.info(f"{log_info} all {total_chunks} chunks submitted")
168166
# Process results as they complete
169167
for future in as_completed(future_to_idx):
170168
chunk_idx = future_to_idx[future]
171-
172169
try:
173170
_result = future.result()
174171
completed_count += 1
175-
176172
if completed_count % 10 == 0 or completed_count == total_chunks:
177173
logger.info(
178174
f"Progress: {completed_count}/{total_chunks} chunks "
179175
f"({completed_count / total_chunks * 100:.1f}%) | "
180176
f"Failed: {failed_count}"
181177
)
182-
183178
except Exception as e:
184179
failed_count += 1
185180
logger.error(f"Chunk {chunk_idx} failed: {e}")
186-
187-
logger.info(
188-
f"{log_info} finished | Completed: {completed_count} | Failed: {failed_count}"
189-
)
190-
191-
return completed_count, failed_count
181+
logger.info(f"{log_info} completed={completed_count} failed={failed_count}")
192182

193183

194184
def crawl_keyword_cranks(database_connection: PostgresCon) -> None:
@@ -592,7 +582,7 @@ def scrape_app(
592582
country: str,
593583
language: str,
594584
) -> dict:
595-
scrape_info = f"{store=}, {country=}, {language=}, {store_id=}, "
585+
scrape_info = f"{store=}, {country=}, {language=}, {store_id=}"
596586
max_retries = 2
597587
base_delay = 1
598588
retries = 0
@@ -646,17 +636,17 @@ def scrape_app(
646636
result_dict["store_id"] = store_id
647637
result_dict["queried_language"] = language.lower()
648638
result_dict["country"] = country.upper()
649-
logger.info(f"{scrape_info} result={crawl_result} scrape finished")
639+
logger.debug(f"{scrape_info} result={crawl_result} scrape finished")
650640
return result_dict
651641

652642

653643
def save_developer_info(
654644
apps_df: pd.DataFrame,
655645
database_connection: PostgresCon,
656646
) -> pd.DataFrame:
657-
assert apps_df["developer_id"].to_numpy()[0], (
658-
f"{apps_df['store_id']} Missing Developer ID"
659-
)
647+
assert apps_df["developer_id"].to_numpy()[
648+
0
649+
], f"{apps_df['store_id']} Missing Developer ID"
660650
df = (
661651
apps_df[["store", "developer_id", "developer_name"]]
662652
.rename(columns={"developer_name": "name"})

0 commit comments

Comments
 (0)