entropyx
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 2 additions & 1 deletion b/‎Dockerfile‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Murray/main.py‎
Lines changed: 362 additions & 200 deletions b/‎Murray/main.py‎
Lines changed: 362 additions & 200 deletions
diff --git a/‎Murray/plots.py‎
Lines changed: 42 additions & 16 deletions b/‎Murray/plots.py‎
Lines changed: 42 additions & 16 deletions
diff --git a/‎Murray/post_analysis.py‎
Lines changed: 142 additions & 0 deletions b/‎Murray/post_analysis.py‎
Lines changed: 142 additions & 0 deletions
@@ -22,4 +22,4 @@ logs/
 specs/
 ai_docs/
 .claude/
-
+*.json
@@ -11,7 +11,8 @@ RUN apt-get update && apt-get install -y \
 
 # Copy and install Python dependencies
 COPY requirements.txt .
-RUN pip install -r requirements.txt
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
 
 # Copy the rest of the application
 COPY . .
 
@@ -251,6 +251,13 @@ def plot_mde_results(results_by_size, sensitivity_results, periods):
     """
     Generates an interactive heatmap showing penalized MDE values that account for
     counterfactual quality and time period.
+    Args:
+        results_by_size: Dictionary containing simulation results
+        sensitivity_results: Dictionary containing sensitivity results
+        periods: List of periods to evaluate
+
+    Returns:
+        fig: Interactive heatmap figure
     """
     holdout_by_location = {
         size: data["Holdout Percentage"] for size, data in results_by_size.items()
@@ -262,39 +269,54 @@ def plot_mde_results(results_by_size, sensitivity_results, periods):
 
     def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_size):
         """
-        Calculates a penalty score based on MDE, counterfactual quality, and time period.
+        Calculates a score based on MDE, counterfactual quality (MAPE, SMAPE), p-value, statistical power, and time period.
+        Longer periods are considered better as they provide more statistical confidence.
         Returns both the score and its components for hover information.
         """
         if pd.isna(mde):
-            return None, None, None, None
+            return None, None, None, None, None, None, None
 
         # Quality metrics
         mape = results_by_size[size].get("MAPE", 0)
         smape = results_by_size[size].get("SMAPE", 0)
 
+        # Statistical metrics
+        p_value = results_by_size[size].get("p_value", 1.0)  
+        power = results_by_size[size].get("power", 0.0)      
+
         # Normalize metrics
         mape_factor = min(mape / 100, 1)
         smape_factor = min(smape / 100, 1)
         quality_score = (mape_factor + smape_factor) / 2
 
-        # Time factor
-        time_score = (period_idx + 1) / total_periods
+        # Normalize p-value (lower is better)
+        p_value_score = 1 - min(p_value, 1)  
+        
+        # Normalize power (higher is better)
+        power_score = min(power, 1)
 
         # MDE factor
         mde_factor = min(mde, 1)
 
+        # Time factor - longer periods are better
+        time_score = (period_idx + 1) / total_periods  
+
         # Calculate final score
-        quality_weight = 0.85
-        time_weight = 0.05
-        mde_weight = 0.15
+        quality_weight = 0.20
+        p_value_weight = 0.15
+        power_weight = 0.55
+        mde_weight = 0.09
+        time_weight = 0.01
 
         final_score = (
             quality_weight * quality_score
-            + time_weight * (1 - time_score)
-            + mde_weight * mde_factor
+            + p_value_weight * p_value_score
+            + power_weight * power_score
+            + mde_weight * (1 - mde_factor)  
+            + time_weight * (1 - time_score)  
         )
 
-        return final_score, mde, mape, smape
+        return final_score, mde, mape, smape, p_value, power, time_score
 
     heatmap_data = pd.DataFrame()
     hover_data = []
@@ -306,17 +328,18 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
 
         for period_idx, period in enumerate(periods):
             mde = period_results.get(period, {}).get("MDE", None)
-            score, original_mde, mape, smape = calculate_penalty_score(
+            score, original_mde, mape, smape, p_value, power, time_score = calculate_penalty_score(
                 mde, period_idx, len(periods), size, results_by_size
             )
             row.append(score)
             hover_row.append(
                 {
-                    "Original MDE": (
-                        f"{original_mde:.2%}" if original_mde is not None else "N/A"
-                    ),
+                    "MDE": f"{original_mde:.2%}" if original_mde is not None else "N/A",
                     "MAPE": f"{mape:.2f}%" if mape is not None else "N/A",
                     "SMAPE": f"{smape:.2f}%" if smape is not None else "N/A",
+                    "P-Value": f"{p_value:.4f}" if p_value is not None else "N/A",
+                    "Statistical Power": f"{power:.2%}" if power is not None else "N/A",
+                    "Period Score": f"{time_score*100:.0f}%" if time_score is not None else "N/A"  
                 }
             )
         heatmap_data[size] = row
@@ -374,10 +397,13 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
             textfont={"size": 12, "color": "black"},
             hovertemplate=(
                 "Treatment size: %{customdata}<br>"
-                + "Penalty Score: %{text}<br>"
-                + "Original MDE: %{customdata:Original MDE}<br>"
+                + "Combined Score: %{text}<br>"
+                + "MDE: %{customdata:MDE}<br>"
                 + "MAPE: %{customdata:MAPE}<br>"
                 + "SMAPE: %{customdata:SMAPE}<br>"
+                + "P-Value: %{customdata:P-Value}<br>"
+                + "Statistical Power: %{customdata:Statistical Power}<br>"
+                + "Period Score: %{customdata:Period Score}<br>"
                 + "<extra></extra>"
             ),
             showscale=True,
 
@@ -2,6 +2,7 @@
 from sklearn.preprocessing import MinMaxScaler
 from Murray.main import select_controls, SyntheticControl
 from Murray.auxiliary import market_correlations, handle_duplicates
+from Murray.plots import calculate_confidence_bands, calculate_optimal_noise_scale
 import pandas as pd
 from logger_config import get_logger
 
@@ -215,3 +216,144 @@ def stat_func(x):
 
     logger.info("run_geo_evaluation completed successfully")
     return results_evaluation
+
+
+def get_evaluation_chart_data(
+    data_input,
+    start_treatment,
+    end_treatment,
+    treatment_group,
+    significance_level=0.05,
+):
+    """
+    Extract only the data needed for plotting charts from evaluation results.
+
+    Args:
+        data_input: Input dataframe
+        start_treatment: Treatment start date
+        end_treatment: Treatment end date
+        treatment_group: List of treatment locations
+        significance_level: Significance level for confidence bands
+
+    Returns:
+        dict: Dictionary containing all data needed for chart plotting
+    """
+    logger.info("Starting get_evaluation_chart_data")
+
+    # First run the evaluation to get base results
+    results = run_geo_evaluation(
+        data_input, start_treatment, end_treatment, treatment_group, spend=0
+    )
+
+    # Extract base values
+    treatment = results["treatment"]
+    counterfactual = results["counterfactual"]
+    period = results["period"]
+    length_treatment = results["length_treatment"]
+
+    # Get date information
+    random_state = data_input["location"].unique()[0]
+    filtered_data = data_input[data_input["location"] == random_state].copy()
+    filtered_data["time"] = pd.to_datetime(filtered_data["time"])
+    dates = filtered_data["time"].dt.date.astype(str).tolist()
+
+    # Calculate treatment start position
+    start_treatment = pd.to_datetime(start_treatment, dayfirst=True)
+    start_idx = (filtered_data["time"].dt.date == start_treatment.date()).idxmax()
+    start_position_treatment = filtered_data.index.get_loc(start_idx)
+
+    # Calculate derived series
+    point_difference = treatment - counterfactual
+    cumulative_effect = ([0] * (len(treatment) - period)) + (
+        np.cumsum(point_difference[len(treatment) - period:])
+    ).tolist()
+
+    # Extract treatment period data
+    y_treatment = treatment[start_position_treatment:]
+    point_difference_treatment = point_difference[start_position_treatment:]
+    cumulative_effect_treatment = cumulative_effect[start_position_treatment:]
+
+    # Calculate confidence bands
+    ci = 1 - significance_level
+    noise_scale = calculate_optimal_noise_scale(y_treatment, counterfactual)
+
+    lower_bound, upper_bound = calculate_confidence_bands(
+        y_treatment, noise_scale=noise_scale, ci=ci
+    )
+    lower_bound_pd, upper_bound_pd = calculate_confidence_bands(
+        point_difference_treatment, ci=ci
+    )
+    lower_bound_ce, upper_bound_ce = calculate_confidence_bands(
+        cumulative_effect_treatment, ci=ci
+    )
+
+    # Calculate aggregate values
+    lower_bound_value = np.sum(lower_bound)
+    upper_bound_value = np.sum(upper_bound)
+    prediction_value = np.sum(treatment[start_position_treatment:])
+
+    # Calculate ATT and incremental
+    att = np.mean(treatment[start_position_treatment:] - counterfactual[start_position_treatment:])
+    att = att / length_treatment
+    incremental = np.sum(treatment[start_position_treatment:] - counterfactual[start_position_treatment:])
+
+    # Calculate pre/post treatment data
+    pre_treatment = treatment[start_position_treatment - period : start_position_treatment]
+    pre_counterfactual = counterfactual[start_position_treatment - period : start_position_treatment]
+    post_treatment = treatment[start_position_treatment:]
+    post_counterfactual = counterfactual[start_position_treatment:]
+
+    chart_data = {
+        # Base series
+        "dates": dates,
+        "treatment": treatment.tolist(),
+        "counterfactual": counterfactual.tolist(),
+        "point_difference": point_difference.tolist(),
+        "cumulative_effect": cumulative_effect,
+
+        # Treatment period data
+        "treatment_dates": dates[start_position_treatment:],
+        "y_treatment": y_treatment.tolist(),
+        "point_difference_treatment": point_difference_treatment.tolist(),
+        "cumulative_effect_treatment": cumulative_effect_treatment,
+
+        # Confidence bands
+        "lower_bound": lower_bound.tolist(),
+        "upper_bound": upper_bound.tolist(),
+        "lower_bound_pd": lower_bound_pd.tolist(),
+        "upper_bound_pd": upper_bound_pd.tolist(),
+        "lower_bound_ce": lower_bound_ce.tolist(),
+        "upper_bound_ce": upper_bound_ce.tolist(),
+
+        # Aggregate values
+        "lower_bound_value": float(lower_bound_value),
+        "upper_bound_value": float(upper_bound_value),
+        "prediction_value": float(prediction_value),
+        "att": float(att),
+        "incremental": float(incremental),
+
+        # Pre/post treatment periods
+        "pre_treatment": pre_treatment.tolist(),
+        "pre_counterfactual": pre_counterfactual.tolist(),
+        "post_treatment": post_treatment.tolist(),
+        "post_counterfactual": post_counterfactual.tolist(),
+
+        # Metadata
+        "start_position_treatment": start_position_treatment,
+        "period": period,
+        "length_treatment": length_treatment,
+
+        # Include key metrics from original evaluation
+        "p_value": results["p_value"],
+        "power": results["power"],
+        "percenge_lift": results["percenge_lift"],
+        "MAPE": results["MAPE"],
+        "SMAPE": results["SMAPE"],
+        "observed_stat": results["observed_stat"],
+        "null_stats": results["null_stats"].tolist(),
+        "control_group": results["control_group"],
+        "weights": results["weights"],
+    }
+
+    logger.info("get_evaluation_chart_data completed successfully")
+    return chart_data