JGIBristol
diff --git a/‎app/app.py‎
Lines changed: 73 additions & 10 deletions b/‎app/app.py‎
Lines changed: 73 additions & 10 deletions
diff --git a/‎app/run_app.py‎
Lines changed: 1 addition & 1 deletion b/‎app/run_app.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/kriging_run.py‎
Lines changed: 36 additions & 0 deletions b/‎scripts/kriging_run.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎scripts/make_fake_data.py‎
Lines changed: 20 additions & 26 deletions b/‎scripts/make_fake_data.py‎
Lines changed: 20 additions & 26 deletions
diff --git a/‎src/digiqual/core.py‎
Lines changed: 13 additions & 4 deletions b/‎src/digiqual/core.py‎
Lines changed: 13 additions & 4 deletions
@@ -477,6 +477,24 @@ def initialize_column_selectors():
             ui.update_selectize("input_cols", choices=cols, selected=default_inputs)
             ui.update_selectize("outcome_col", choices=cols, selected=default_outcome)
 
+    @reactive.effect
+    @reactive.event(input.btn_refine)
+    def handle_refinement():
+        study = current_study()
+        if study is None:
+            return
+
+        try:
+            # We assume your SimulationStudy has a refine method
+            # that targets the 'Length' gaps we discussed earlier
+            n_to_gen = input.n_new_samples()
+            refined_df = study.refine(n_points=n_to_gen) # Or your specific generation logic
+
+            new_samples.set(refined_df)
+            ui.notification_show(f"Generated {n_to_gen} targeted samples.", type="message")
+        except Exception as e:
+            ui.notification_show(f"Refinement failed: {e}", type="error")
+
     @render.ui
     def selection_error_display():
         """Displays a permanent red error if selections conflict."""
@@ -605,6 +623,29 @@ def remediation_ui():
         )
 
 
+    @render.ui
+    def download_new_samples_ui():
+        # Only show the button if new_samples has been populated
+        if new_samples() is None:
+            return None
+
+        return ui.div(
+            ui.hr(),
+            ui.p("Success! Download your targeted samples below:", class_="small"),
+            ui.download_button(
+                "download_new_samples",
+                "Download Refined CSV",
+                class_="btn-success w-100",
+                icon=icon_svg("download")
+            )
+        )
+
+    @render.download(filename="remediation_samples.csv")
+    def download_new_samples():
+        df = new_samples()
+        if df is not None:
+            yield df.to_csv(index=False)
+
 
 #### Server - PoD Generation (Tab 4) ####
 
@@ -709,19 +750,25 @@ def compute_pod_analysis():
             val = results["a90_95"]
             a9095_str = f"{val:.3f}" if not np.isnan(val) else "Not Reached"
 
+            # 3. Format the Mean Model string based on the new architecture
+            mean_model = results["mean_model"]
+            if mean_model.model_type_ == 'Polynomial':
+                model_str = f"Polynomial (Degree {mean_model.model_params_})"
+            else:
+                model_str = "Kriging (Gaussian Process)"
 
-
-            # 3. Create Metrics Dictionary for the UI
+            # 4. Create Metrics Dictionary for the UI
             metrics = {
                 "Parameter of Interest": results["poi_col"],
                 "Threshold": results["threshold"],
                 "a90/95": a9095_str,
-                "Model Degree": results["mean_model"].best_degree_,
+                "Mean Model": model_str,
                 "Smoothing Bandwidth": f"{results['bandwidth']:.4f}",
+                "Error Distribution": results["dist_info"][0].capitalize()
             }
             pod_metrics.set(metrics)
 
-            # 4. Prepare Data for Download
+            # 5. Prepare Data for Download
             export_df = pd.DataFrame({
                 "x_defect_size": results["X_eval"],
                 "pod_mean": results["curves"]["pod"],
@@ -730,25 +777,32 @@ def compute_pod_analysis():
             })
             pod_export_data.set(export_df)
 
-            # 5. Generate Plots (Visualise draws them internally)
+            # 6. Generate Plots (Visualise draws them internally)
             study.visualise(show=False)
             plot_trigger.set(plot_trigger() + 1)
 
         except Exception as e:
             ui.notification_show(f"Analysis Failed: {str(e)}", type="error")
 
 
-    # --- RESULTS DISPLAY ---
+# --- RESULTS DISPLAY ---
     @render.ui
     def pod_results_container():
         """
-        Renders the side-by-side plots and the metrics table.
+        Renders the model selection plot, side-by-side analysis plots, and the metrics table.
         """
         if pod_metrics() is None:
             return ui.div()
 
         return ui.div(
-            # Row 1: Plots
+            # Row 1: Model Selection Plot (Full Width)
+            ui.card(
+                ui.card_header("Model Selection (Bias-Variance Tradeoff)"),
+                ui.output_plot("plot_model_selection", height="400px"),
+                full_screen=True,
+                class_="mb-3"
+            ),
+            # Row 2: Signal Model and PoD Plots
             ui.layout_columns(
                 ui.card(
                     ui.card_header("Signal Model Fit"),
@@ -760,9 +814,10 @@ def pod_results_container():
                     ui.output_plot("plot_curve"),
                     full_screen=True
                 ),
-                col_widths=[6, 6]
+                col_widths=[6, 6],
+                class_="mb-3"
             ),
-            # Row 2: Table and Download Actions
+            # Row 3: Table and Download Actions
             ui.layout_columns(
                 ui.card(
                     ui.card_header("Key Reliability Metrics"),
@@ -780,6 +835,14 @@ def pod_results_container():
             )
         )
 
+    @render.plot
+    def plot_model_selection():
+        _ = plot_trigger() # Dependency on button click
+        study = current_study()
+        if study and "model_selection" in study.plots:
+            return study.plots["model_selection"]
+        return None
+
     @render.plot
     def plot_signal():
         _ = plot_trigger() # Dependency on button click
 
@@ -78,7 +78,7 @@ def inject_js(window):
     // Map Button IDs to Filenames
     const filenameMap = {
         'download_lhs': 'experimental_design.csv',
-        'download_new_samples_csv': 'refinement_samples.csv',
+        'download_new_samples': 'refinement_samples.csv',
         'download_pod_results': 'pod_analysis_results.csv'
     };
 
 
@@ -0,0 +1,36 @@
+import numpy as np
+import pandas as pd
+from digiqual.core import SimulationStudy
+
+print("Generating synthetic non-linear data...")
+# 1. Generate Non-linear Data (Sigmoid Curve)
+# This shape is difficult for polynomials but perfect for Kriging.
+np.random.seed(42)
+flaw_sizes = np.linspace(0.1, 10.0, 150)
+
+# Sigmoid function: plateaus at the top and bottom
+true_responses = 20 / (1 + np.exp(-1.5 * (flaw_sizes - 5)))
+# Add noise that scales slightly with the flaw size
+noise = np.random.normal(0, 1.0 + 0.1 * flaw_sizes, size=len(flaw_sizes))
+responses = true_responses + noise
+
+df = pd.DataFrame({
+        'Flaw_Size': flaw_sizes,
+        'Response': responses
+})
+
+# 3. Initialize the Study
+print("Initializing SimulationStudy...")
+study = SimulationStudy(input_cols=['Flaw_Size'], outcome_col='Response')
+study.add_data(df)
+study.diagnose()
+
+# 4. Run the PoD Analysis
+# We use a threshold that intersects the middle of our S-Curve (e.g., 10.0)
+# Using 100 bootstrap iterations so it runs relatively quickly for testing
+print("\n--- Running PoD Analysis ---")
+results = study.pod(poi_col='Flaw_Size', threshold=10.0, n_boot=100)
+
+# 5. Show the Final Visualizations
+print("\n--- Generating Visualizations ---")
+study.visualise()
@@ -1,48 +1,42 @@
 import pandas as pd
 import numpy as np
 
-def generate_fake_data(filename="initial_data.csv", n=50):
-    """Generates a small dataset that might FAIL diagnostics (for testing the 'Fix' loop)."""
-    np.random.seed(42)
-
-    # 1. Generate Inputs (Small N = likely gaps)
+def generate_fake_data(filename="app/initial_data.csv", n=25):
+    """Fails due to massive Gaps and Skewed Heteroskedasticity."""
+    # 1. Deliberate Gap (0-2 and 8-10)
+    lengths = np.concatenate([np.random.uniform(0, 2, 12), np.random.uniform(8, 10, 13)])
     df = pd.DataFrame({
-        'Length': np.random.uniform(0, 10, n),
+        'Length': lengths,
         'Angle': np.random.uniform(-45, 45, n)
     })
 
-    # 2. Physics & Noise
-    base_signal = (df['Length'] * 2.0) - (0.1 * df['Angle'].abs())
-    noise_scale = 0.5 + (0.1 * df['Length'])
-    noise = np.random.normal(loc=0, scale=noise_scale, size=n)
+    # 2. Monotonic Physics + Skewed Gamma Noise
+    # As Length increases, the 'scale' of the Gamma noise increases (Heteroskedasticity)
+    base_signal = 10.0 + 1.5 * df['Length'] + 0.2 * (df['Length']**2)
 
-    df['Signal'] = np.abs(base_signal + noise)
+    # Non-normal noise: Gamma distribution is always positive and skewed
+    noise_scale = 0.5 + (0.8 * df['Length'])
+    noise = np.random.gamma(shape=2.0, scale=noise_scale, size=n)
 
+    df['Signal'] = base_signal + noise
     df.to_csv(filename, index=False)
-    print(f"✅ Created '{filename}' with {n} rows (likely to have issues).")
-
+    print(f"✅ Created '{filename}' (N={n}). Should fail Gap and Bootstrap.")
 
-def updated_data(filename="sufficient_data.csv", n=200):
-    """Generates a large dataset that should PASS all diagnostics."""
-    np.random.seed(999) # Different seed
-
-    # 1. Generate Inputs (Large N = good coverage)
+def updated_data(filename="app/sufficient_data.csv", n=1500):
+    """Passes because high N overcomes the skewed noise."""
     df = pd.DataFrame({
         'Length': np.random.uniform(0, 10, n),
         'Angle': np.random.uniform(-45, 45, n)
     })
 
-    # 2. Physics & Noise
-    base_signal = (df['Length'] * 2.0) - (0.1 * df['Angle'].abs())
-    noise_scale = 0.5 + (0.1 * df['Length'])
-    noise = np.random.normal(loc=0, scale=noise_scale, size=n)
-
-    df['Signal'] = np.abs(base_signal + noise)
+    base_signal = 10.0 + 1.5 * df['Length'] + 0.2 * (df['Length']**2)
+    noise_scale = 0.5 + (0.8 * df['Length'])
+    noise = np.random.gamma(shape=2.0, scale=noise_scale, size=n)
 
+    df['Signal'] = base_signal + noise
     df.to_csv(filename, index=False)
-    print(f"✅ Created '{filename}' with {n} rows (should pass checks).")
+    print(f"✅ Created '{filename}' (N={n}). Should pass all tests.")
 
 if __name__ == "__main__":
-    # You can comment out the one you don't want, or run both
     generate_fake_data()
     updated_data()
@@ -267,7 +267,7 @@ def optimise(
         self.data = pd.DataFrame() # Clear old state to avoid duplication
         self.add_data(final_data)
 
-    #### PoD Analysis ####
+#### PoD Analysis ####
     def pod(
         self,
         poi_col: str,
@@ -308,10 +308,13 @@ def pod(
         X = self.clean_data[poi_col].values
         y = self.clean_data[self.outcome].values
 
-        # 2. Fit Mean Model (Robust Polynomial)
+        # 2. Fit Mean Model (Robust Regression)
         print("1. Selecting Mean Model (Cross-Validation)...")
         mean_model = pod.fit_robust_mean_model(X, y)
-        print(f"   -> Selected Polynomial Degree: {mean_model.best_degree_}")
+        if mean_model.model_type_ == 'Polynomial':
+            print(f"-> Selected Model: Polynomial (Degree {mean_model.model_params_})")
+        else:
+            print("-> Selected Model: Kriging (Gaussian Process)")
 
         # 3. Fit Variance Model & Generate Grid
         print("2. Fitting Variance Model (Kernel Smoothing)...")
@@ -335,7 +338,7 @@ def pod(
         print(f"5. Running Bootstrap ({n_boot} iterations)...")
         lower_ci, upper_ci = pod.bootstrap_pod_ci(
             X, y, X_eval, threshold,
-            mean_model.best_degree_, bandwidth, (dist_name, dist_params),
+            mean_model.model_type_, mean_model.model_params_, bandwidth, (dist_name, dist_params),
             n_boot=n_boot
         )
 
@@ -397,6 +400,10 @@ def visualise(self, show: bool = True, save_path: str = None) -> None:
             res["X"], res["residuals"], res["X_eval"], res["bandwidth"]
         )
 
+        # 0. Model Selection Plot (NEW)
+        if hasattr(res["mean_model"], "cv_scores_"):
+            self.plots["model_selection"] = pod.plot_model_selection(res["mean_model"].cv_scores_)
+
         # 1. Signal Model Plot
         self.plots["signal_model"] = plot_signal_model(
             X=res["X"],
@@ -420,6 +427,8 @@ def visualise(self, show: bool = True, save_path: str = None) -> None:
 
         # Handle Saving
         if save_path:
+            if "model_selection" in self.plots:
+                self.plots["model_selection"].savefig(f"{save_path}_model_selection.png")
             self.plots["signal_model"].get_figure().savefig(f"{save_path}_signal.png")
             self.plots["pod_curve"].get_figure().savefig(f"{save_path}_pod.png")
             print(f"Plots saved to {save_path}_*.png")