@@ -201,45 +201,34 @@ def process_country_group(group):
201201def create_app_plot_dict (app_hist : pd .DataFrame ) -> pd .DataFrame :
202202 """Create plot dicts for the app history with linear interpolation for missing weeks."""
203203 star_cols = ["one_star" , "two_star" , "three_star" , "four_star" , "five_star" ]
204- metrics = ["installs" , "rating" , "review_count" , "rating_count" , * star_cols ]
205- xaxis_col = "snapshot_date"
204+ cumulative_metrics = ["rating" , * star_cols ]
205+ weekly_metrics = [
206+ "weekly_installs" ,
207+ "weekly_ratings" ,
208+ "weekly_reviews" ,
209+ "weekly_active_users" ,
210+ "monthly_active_users" ,
211+ "weekly_ad_revenue" ,
212+ "weekly_iap_revenue" ,
213+ ]
214+ xaxis_col = "week_start"
206215 # Convert to datetime and sort
207216 app_hist [xaxis_col ] = pd .to_datetime (app_hist [xaxis_col ])
208217 app_hist = app_hist .sort_values (xaxis_col )
209- app_hist = app_hist .set_index (xaxis_col )
210- # Resample to weekly frequency - this creates missing weeks with NaN
211- app_hist = app_hist .resample ("W" ).last ()
212218
213- # Replace zeros with NaN for cumulative metrics (zeros are data holes, not valid values)
214- # Linear interpolation for cumulative metrics (installs, rating_count, review_count, star counts)
215- cumulative_metrics = ["installs" , "rating_count" , "review_count" , * star_cols ]
216- for metric in cumulative_metrics :
217- if metric in app_hist .columns :
218- app_hist [metric ].head ()
219- app_hist [metric ].dtype
220- # Replace 0 with NaN (these are data holes, not valid cumulative values)
221- app_hist [metric ] = app_hist [metric ].replace (0 , np .nan )
222- # Linear interpolation
223- app_hist [metric ] = app_hist [metric ].interpolate (
224- method = "linear" , limit_direction = "forward"
225- )
226-
227- # For rating (average), also replace zeros and interpolate
228- if "rating" in app_hist .columns :
229- # Replace 0 with NaN (invalid rating)
230- app_hist ["rating" ] = app_hist ["rating" ].replace (0 , np .nan )
231- # Interpolate
232- app_hist ["rating" ] = app_hist ["rating" ].interpolate (
233- method = "linear" , limit_direction = "forward"
234- )
235- app_hist = app_hist .reset_index ()
236- # Calculate days between snapshots
237- app_hist ["date_change" ] = app_hist [xaxis_col ] - app_hist [xaxis_col ].shift (1 )
238- app_hist ["days_changed" ] = app_hist ["date_change" ].apply (
239- lambda x : np .nan if pd .isna (x ) else x .days ,
240- )
241219 metrics_to_add = []
242- for metric in metrics :
220+ for metric in weekly_metrics :
221+ rate_of_change_metric = f"{ metric } _rate_of_change"
222+ avg_per_day_metric = f"{ metric } _avg_per_day"
223+ # Formula: ((new - old) / old) * 100
224+ app_hist [rate_of_change_metric ] = (
225+ app_hist [metric ] / app_hist [metric ].shift (1 )
226+ ) * 100
227+ app_hist [avg_per_day_metric ] = app_hist [metric ] / 7
228+ metrics_to_add .append (rate_of_change_metric )
229+ metrics_to_add .append (avg_per_day_metric )
230+
231+ for metric in cumulative_metrics :
243232 change_metric = f"new_{ metric } "
244233 rate_of_change_metric = f"{ metric } _rate_of_change"
245234 avg_per_day_metric = f"{ metric } _avg_per_day"
@@ -250,14 +239,18 @@ def create_app_plot_dict(app_hist: pd.DataFrame) -> pd.DataFrame:
250239 (app_hist [metric ] - app_hist [metric ].shift (1 )) / app_hist [metric ].shift (1 )
251240 ) * 100
252241 # Avg Per Day (daily average of the change)
253- app_hist [avg_per_day_metric ] = (
254- app_hist [change_metric ] / app_hist ["days_changed" ]
255- )
242+ app_hist [avg_per_day_metric ] = app_hist [change_metric ] / 7
256243 metrics_to_add .append (change_metric )
257244 metrics_to_add .append (rate_of_change_metric )
258245 metrics_to_add .append (avg_per_day_metric )
246+
247+ # Include cumulative/base columns for charts (cumulative_installs, cumulative_ratings, rating, star_cols)
248+ base_cols = ["cumulative_installs" , "cumulative_ratings" , "rating" , * star_cols ]
249+ available_base = [c for c in base_cols if c in app_hist .columns ]
259250 # Select final columns and drop the first row (no previous data to compare)
260- app_hist = app_hist [[xaxis_col , * metrics , * metrics_to_add ]].drop (app_hist .index [0 ])
251+ app_hist = app_hist [
252+ [xaxis_col , * weekly_metrics , * available_base , * metrics_to_add ]
253+ ].drop (app_hist .index [0 ])
261254 # Replace infinite values with NaN
262255 app_hist = app_hist .replace ([np .inf , - np .inf ], np .nan )
263256 # Drop columns that are all NaN
0 commit comments