Skip to content

Commit 691bde3

Browse files
authored
Merge pull request #51 from ddxv/main
Merge main
2 parents de83f36 + eb1d184 commit 691bde3

File tree

5 files changed

+23
-25
lines changed

5 files changed

+23
-25
lines changed

README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,13 @@ Note: This project is not a one click setup but feel free to reach out for help.
1818
- PostgreSQL: 17/18
1919
- Setup your database. The files here use the database name 'madrone'
2020
- Add a password to your default db user if you dont have one yet `ALTER USER postgres WITH PASSWORD 'xxx';`
21-
2221
- Python environment: Python 3.13
2322
- Setup python environment `python3.12 -m venv .virtualenv` & `source .virtualenv/bin/activate`
2423
- `uv pip install -r pyproject.toml`
2524
- `cp example_config.toml ~/config/adscrawler/config.toml` and edit any needed values. For using all locally, the main thing that needs to be modified is the `xxx` for postgres pass and S3 host.
2625
- In your virtualenv, init db `python db_init.py` -> Initializes MVs, inserts 3m+ apps' store_ids from https://github.com/ddxv/appgoblin-data
27-
2826
- Google Play App Ranks Require: NodeJS
2927
- `npm install --save google-play-scraper`
30-
3128
- an S3 bucket used by app ranks, APK/IPA download, MITM
3229

3330
## Run

adscrawler/app_stores/apple.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ def get_developer_url(result: dict, urls: dict) -> str:
297297
"""
298298
Decide if we should crawl the store html for the developer url.
299299
"""
300+
final_url: str
300301
should_crawl_html = False
301302
if "sellerUrl" not in result.keys():
302303
should_crawl_html = True
@@ -316,14 +317,14 @@ def get_developer_url(result: dict, urls: dict) -> str:
316317
if len(found_tlds) == 0:
317318
if "sellerUrl" not in result.keys():
318319
raise Exception(f"No developer url found for {urls=}")
319-
final_url: str = result["sellerUrl"]
320+
final_url = result["sellerUrl"]
320321
elif len(found_tlds) == 1:
321-
final_url: str = found_tlds[0]
322+
final_url = found_tlds[0]
322323
else:
323324
logger.warning(f"Multiple developer sites found for {urls=} {found_tlds=}")
324-
final_url: str = result["sellerUrl"]
325+
final_url = result["sellerUrl"]
325326
else:
326-
final_url: str = result["sellerUrl"]
327+
final_url = result["sellerUrl"]
327328
return final_url
328329

329330

@@ -454,7 +455,9 @@ def clean_ios_app_df(df: pd.DataFrame) -> pd.DataFrame:
454455
logger.warning("Unable to parse histogram")
455456
df["histogram"] = None
456457
if "description" in df.columns:
457-
df["description"] = df["description"].apply(truncate_utf8_bytes)
458+
df.loc[df["description"].notna(), "description"] = df.loc[
459+
df["description"].notna(), "description"
460+
].apply(truncate_utf8_bytes)
458461
if (
459462
"store_language_code" in df.columns
460463
and df["store_language_code"].str.len().all() == 2

adscrawler/app_stores/google.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ def clean_google_play_app_df(apps_df: pd.DataFrame) -> pd.DataFrame:
106106
apps_df["release_date"], format="%b %d, %Y"
107107
).dt.date,
108108
store_last_updated=pd.to_datetime(
109-
apps_df["store_last_updated"],
110-
unit="s",
109+
apps_df["store_last_updated"], unit="s", errors="coerce"
111110
).fillna(apps_df["release_date"]),
112111
)
113112
if "developer_name" in apps_df.columns:

adscrawler/dbcon/sql/query_simplified_store_app_z_scores.sql

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ my_advs AS (
2828
ON
2929
ic.id = icc.company_id
3030
WHERE
31-
vcasr.run_at > :target_week::date - interval '7 days'
32-
AND vcasr.run_at <= :target_week::date
31+
vcasr.run_at > :target_week ::date - interval '7 days'
32+
AND vcasr.run_at <= :target_week ::date
3333
AND (
3434
hcc.category_id = 1
3535
)
@@ -50,10 +50,10 @@ baseline_period AS (
5050
s.store_app = m.advertiser_store_app_id
5151
WHERE
5252
s.week_start >= (
53-
:target_week::date - interval '22 days'
53+
:target_week ::date - interval '22 days'
5454
)
5555
AND s.week_start <= (
56-
:target_week::date - interval '7 days'
56+
:target_week ::date - interval '7 days'
5757
)
5858
AND s.country_id = 840
5959
GROUP BY
@@ -62,16 +62,16 @@ baseline_period AS (
6262
target_weeks_data AS (
6363
SELECT
6464
s.store_app,
65-
:target_week::date AS target_week,
65+
:target_week ::date AS target_week,
6666
SUM(s.installs_diff) AS target_week_installs,
6767
SUM(s.rating_count_diff) AS target_week_rating_count
6868
FROM
6969
store_apps_history_weekly AS s
7070
WHERE
71-
s.week_start >= :target_week::date
71+
s.week_start >= :target_week ::date
7272
AND
7373
s.week_start < (
74-
:target_week::date + interval '7 days'
74+
:target_week ::date + interval '7 days'
7575
)
7676
AND s.country_id = 840
7777
GROUP BY
@@ -135,14 +135,13 @@ ranked_z_scores AS (
135135
sa.content_rating,
136136
sa.ad_supported,
137137
sa.in_app_purchases,
138-
sa.editors_choice,
139138
sa.created_at,
140139
sa.updated_at,
141140
sa.crawl_result,
142141
sa.icon_url_100,
143142
sa.icon_url_512,
144143
sa.release_date,
145-
sa.rating_count,
144+
agm.rating_count,
146145
sa.featured_image_url,
147146
sa.phone_image_url_1,
148147
sa.phone_image_url_2,
@@ -157,16 +156,16 @@ ranked_z_scores AS (
157156
sa.store,
158157
(
159158
CASE
160-
WHEN sa.store = 2 THEN 'rating'::text
161-
ELSE 'installs'::text
159+
WHEN sa.store = 2 THEN 'rating' ::text
160+
ELSE 'installs' ::text
162161
END
163162
)
164163
ORDER BY
165164
(
166165
CASE
167166
WHEN sa.store = 2 THEN saz.ratings_z_score_1w
168167
WHEN sa.store = 1 THEN saz.installs_z_score_1w
169-
ELSE NULL::numeric
168+
ELSE NULL ::numeric
170169
END
171170
) DESC NULLS LAST
172171
) AS rn
@@ -178,7 +177,7 @@ ranked_z_scores AS (
178177
LEFT JOIN app_global_metrics_latest AS agm
179178
ON sa.id = agm.store_app
180179
LEFT JOIN category_mapping AS cm ON
181-
sa.category::text = cm.original_category::text
180+
sa.category ::text = cm.original_category ::text
182181
)
183182
SELECT
184183
target_week,

adscrawler/tools/reports.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
database_connection = get_db_connection(use_ssh_tunnel=use_tunnel)
88

99

10-
start_date = "2025-08-01"
11-
end_date = "2025-09-30"
10+
start_date = "2025-10-01"
11+
end_date = "2025-10-31"
1212
for week in pd.date_range(start=start_date, end=end_date, freq="W-Mon"):
1313
df = query_zscores(database_connection, target_week=week)
1414
df.to_sql(

0 commit comments

Comments
 (0)