Skip to content

Commit 6accbb7

Browse files
authored
Prep for adding DAU / MAU / IAP Revenue estimates
Prep for adding DAU / MAU / Revenue
2 parents ab750c5 + d7c94a1 commit 6accbb7

28 files changed

+996
-1293
lines changed

backend/api_app/controllers/apps.py

Lines changed: 31 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -201,45 +201,34 @@ def process_country_group(group):
201201
def create_app_plot_dict(app_hist: pd.DataFrame) -> pd.DataFrame:
202202
"""Create plot dicts for the app history with linear interpolation for missing weeks."""
203203
star_cols = ["one_star", "two_star", "three_star", "four_star", "five_star"]
204-
metrics = ["installs", "rating", "review_count", "rating_count", *star_cols]
205-
xaxis_col = "snapshot_date"
204+
cumulative_metrics = ["rating", *star_cols]
205+
weekly_metrics = [
206+
"weekly_installs",
207+
"weekly_ratings",
208+
"weekly_reviews",
209+
"weekly_active_users",
210+
"monthly_active_users",
211+
"weekly_ad_revenue",
212+
"weekly_iap_revenue",
213+
]
214+
xaxis_col = "week_start"
206215
# Convert to datetime and sort
207216
app_hist[xaxis_col] = pd.to_datetime(app_hist[xaxis_col])
208217
app_hist = app_hist.sort_values(xaxis_col)
209-
app_hist = app_hist.set_index(xaxis_col)
210-
# Resample to weekly frequency - this creates missing weeks with NaN
211-
app_hist = app_hist.resample("W").last()
212218

213-
# Replace zeros with NaN for cumulative metrics (zeros are data holes, not valid values)
214-
# Linear interpolation for cumulative metrics (installs, rating_count, review_count, star counts)
215-
cumulative_metrics = ["installs", "rating_count", "review_count", *star_cols]
216-
for metric in cumulative_metrics:
217-
if metric in app_hist.columns:
218-
app_hist[metric].head()
219-
app_hist[metric].dtype
220-
# Replace 0 with NaN (these are data holes, not valid cumulative values)
221-
app_hist[metric] = app_hist[metric].replace(0, np.nan)
222-
# Linear interpolation
223-
app_hist[metric] = app_hist[metric].interpolate(
224-
method="linear", limit_direction="forward"
225-
)
226-
227-
# For rating (average), also replace zeros and interpolate
228-
if "rating" in app_hist.columns:
229-
# Replace 0 with NaN (invalid rating)
230-
app_hist["rating"] = app_hist["rating"].replace(0, np.nan)
231-
# Interpolate
232-
app_hist["rating"] = app_hist["rating"].interpolate(
233-
method="linear", limit_direction="forward"
234-
)
235-
app_hist = app_hist.reset_index()
236-
# Calculate days between snapshots
237-
app_hist["date_change"] = app_hist[xaxis_col] - app_hist[xaxis_col].shift(1)
238-
app_hist["days_changed"] = app_hist["date_change"].apply(
239-
lambda x: np.nan if pd.isna(x) else x.days,
240-
)
241219
metrics_to_add = []
242-
for metric in metrics:
220+
for metric in weekly_metrics:
221+
rate_of_change_metric = f"{metric}_rate_of_change"
222+
avg_per_day_metric = f"{metric}_avg_per_day"
223+
# Formula: ((new - old) / old) * 100
224+
app_hist[rate_of_change_metric] = (
225+
app_hist[metric] / app_hist[metric].shift(1)
226+
) * 100
227+
app_hist[avg_per_day_metric] = app_hist[metric] / 7
228+
metrics_to_add.append(rate_of_change_metric)
229+
metrics_to_add.append(avg_per_day_metric)
230+
231+
for metric in cumulative_metrics:
243232
change_metric = f"new_{metric}"
244233
rate_of_change_metric = f"{metric}_rate_of_change"
245234
avg_per_day_metric = f"{metric}_avg_per_day"
@@ -250,14 +239,18 @@ def create_app_plot_dict(app_hist: pd.DataFrame) -> pd.DataFrame:
250239
(app_hist[metric] - app_hist[metric].shift(1)) / app_hist[metric].shift(1)
251240
) * 100
252241
# Avg Per Day (daily average of the change)
253-
app_hist[avg_per_day_metric] = (
254-
app_hist[change_metric] / app_hist["days_changed"]
255-
)
242+
app_hist[avg_per_day_metric] = app_hist[change_metric] / 7
256243
metrics_to_add.append(change_metric)
257244
metrics_to_add.append(rate_of_change_metric)
258245
metrics_to_add.append(avg_per_day_metric)
246+
247+
# Include cumulative/base columns for charts (cumulative_installs, cumulative_ratings, rating, star_cols)
248+
base_cols = ["cumulative_installs", "cumulative_ratings", "rating", *star_cols]
249+
available_base = [c for c in base_cols if c in app_hist.columns]
259250
# Select final columns and drop the first row (no previous data to compare)
260-
app_hist = app_hist[[xaxis_col, *metrics, *metrics_to_add]].drop(app_hist.index[0])
251+
app_hist = app_hist[
252+
[xaxis_col, *weekly_metrics, *available_base, *metrics_to_add]
253+
].drop(app_hist.index[0])
261254
# Replace infinite values with NaN
262255
app_hist = app_hist.replace([np.inf, -np.inf], np.nan)
263256
# Drop columns that are all NaN

backend/api_app/controllers/companies.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -95,15 +95,21 @@ def make_company_api_domains_dict(
9595
.reset_index()
9696
)
9797

98-
df["country"] = df["country"].apply(
99-
lambda x: x.tolist() if isinstance(x, np.ndarray) else x
100-
)
98+
# Pandas 3.0 defaulting to ArrowStringArray
10199
df["org"] = df["org"].apply(
102-
lambda x: x.tolist() if isinstance(x, np.ndarray) else x
100+
lambda x: (
101+
x.tolist()
102+
if isinstance(x, np.ndarray) or isinstance(x, pd.arrays.ArrowStringArray)
103+
else x
104+
)
103105
)
104106

105107
df["country"] = df["country"].apply(
106-
lambda x: x.tolist() if isinstance(x, np.ndarray) else x
108+
lambda x: (
109+
x.tolist()
110+
if isinstance(x, np.ndarray) or isinstance(x, pd.arrays.ArrowStringArray)
111+
else x
112+
)
107113
)
108114

109115
missing_domains = [
@@ -558,16 +564,6 @@ def make_company_stats(df: pd.DataFrame) -> CompanyCategoryOverview:
558564
res_installs_d30["adstxt_reseller_android"],
559565
)
560566

561-
# (
562-
# sdk_ios_rating_count_d30,
563-
# adstxt_direct_ios_rating_count_d30,
564-
# adstxt_reseller_ios_rating_count_d30,
565-
# ) = (
566-
# res_rating_count_d30["sdk_ios"],
567-
# res_rating_count_d30["adstxt_direct_ios"],
568-
# res_rating_count_d30["adstxt_reseller_ios"],
569-
# )
570-
571567
sdk_total_apps = sdk_ios_total_apps + sdk_android_total_apps
572568
total_apps = (
573569
sdk_total_apps

backend/api_app/models.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,9 +316,6 @@ class CategoryCompanyStats:
316316
sdk_android_installs_d30: int = 0
317317
adstxt_direct_android_installs_d30: int = 0
318318
adstxt_reseller_android_installs_d30: int = 0
319-
# sdk_ios_rating_count_d30: int = 0
320-
# adstxt_direct_ios_rating_count_d30: int = 0
321-
# adstxt_reseller_ios_rating_count_d30: int = 0
322319

323320

324321
@dataclass

backend/dbcon/queries.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,7 @@ def get_growth_apps(
8484
)
8585
decimal_cols = [
8686
"installs_z_score_2w",
87-
"ratings_z_score_2w",
8887
"installs_z_score_4w",
89-
"ratings_z_score_4w",
9088
]
9189
df[decimal_cols] = df[decimal_cols].round(2)
9290
return df
@@ -450,7 +448,6 @@ def get_company_stats(
450448
"installs_total",
451449
"installs_d30",
452450
"rating_count_total",
453-
"rating_count_d30",
454451
]
455452
]
456453
.sum()
Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,23 @@
11
SELECT
2-
agmh.snapshot_date,
3-
agmh.installs,
2+
agmh.week_start,
3+
agmh.weekly_installs,
4+
agmh.weekly_ratings,
5+
agmh.weekly_reviews,
6+
agmh.weekly_active_users,
7+
agmh.monthly_active_users,
8+
agmh.weekly_ad_revenue,
9+
agmh.weekly_iap_revenue,
10+
agmh.total_installs AS cumulative_installs,
11+
agmh.total_ratings AS cumulative_ratings,
412
agmh.rating,
5-
agmh.rating_count,
6-
agmh.review_count,
713
agmh.one_star,
814
agmh.two_star,
915
agmh.three_star,
1016
agmh.four_star,
1117
agmh.five_star
12-
FROM app_global_metrics_history AS agmh
13-
inner join store_apps sa on agmh.store_app = sa.id
18+
FROM app_global_metrics_weekly AS agmh
19+
INNER JOIN store_apps AS sa ON agmh.store_app = sa.id
1420
WHERE
1521
sa.store_id = :store_id
16-
AND agmh.snapshot_date >= CURRENT_DATE - INTERVAL '375 days'
17-
ORDER BY agmh.snapshot_date
18-
;
22+
AND agmh.week_start >= CURRENT_DATE - INTERVAL '375 days'
23+
ORDER BY agmh.week_start;

backend/dbcon/sql/query_apps_crossfilter.sql

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,9 @@ SELECT
3030
sao.id,
3131
sao.store_id,
3232
sao.name,
33-
sao.installs_est AS installs,
33+
sao.installs,
3434
sao.rating_count,
35-
sao.installs_sum_4w_est AS installs_d30,
36-
sao.ratings_sum_4w AS ratings_d30,
35+
sao.installs_sum_4w AS installs_d30,
3736
sao.in_app_purchases,
3837
sao.ad_supported,
3938
sao.store,
@@ -55,12 +54,12 @@ WHERE
5554

5655
:min_installs ::bigint IS NULL
5756
OR :min_installs = 0
58-
OR sao.installs_est >= :min_installs
57+
OR sao.installs >= :min_installs
5958
)
6059
AND (
6160

6261
:max_installs ::bigint IS NULL
63-
OR sao.installs_est <= :max_installs
62+
OR sao.installs <= :max_installs
6463
)
6564
AND (
6665

@@ -75,18 +74,18 @@ WHERE
7574
AND (
7675

7776
:min_installs_d30 ::bigint IS NULL
78-
OR sao.installs_sum_4w_est >= :min_installs_d30
77+
OR sao.installs_sum_4w >= :min_installs_d30
7978
)
8079
AND (
8180

8281
:max_installs_d30 ::bigint IS NULL
83-
OR sao.installs_sum_4w_est <= :max_installs_d30
82+
OR sao.installs_sum_4w <= :max_installs_d30
8483
)
8584
-- Exclusion check
8685
AND NOT EXISTS (
8786
SELECT 1 FROM exclude_apps AS ea
8887
WHERE ea.store_app = sao.id
8988
)
9089
ORDER BY
91-
sao.installs_est DESC
90+
sao.installs DESC
9291
LIMIT 100;

backend/dbcon/sql/query_company_secondary_top_apps.sql

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ SELECT
1313
ra.name,
1414
ra.store_id,
1515
ra.app_company_rank AS rank,
16-
ra.rating_count_d30,
1716
ra.installs_d30,
1817
ra.sdk,
1918
ra.api_call,

backend/dbcon/sql/query_company_secondary_top_apps_category.sql

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ SELECT
1414
ra.name,
1515
ra.store_id,
1616
ra.app_company_category_rank AS rank,
17-
ra.rating_count_d30,
1817
ra.installs_d30,
1918
ra.sdk,
2019
ra.api_call,

backend/dbcon/sql/query_company_top_apps.sql

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ SELECT
1515
ra.developer_name,
1616
ra.icon_url_100,
1717
ra.app_company_rank AS rank,
18-
ra.rating_count_d30,
1918
ra.installs_d30,
2019
ra.sdk,
2120
ra.api_call,

backend/dbcon/sql/query_company_top_apps_category.sql

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ SELECT
1616
ra.developer_name,
1717
ra.icon_url_100,
1818
ra.app_company_category_rank AS rank,
19-
ra.rating_count_d30,
2019
ra.installs_d30,
2120
ra.sdk,
2221
ra.api_call,

0 commit comments

Comments
 (0)