Skip to content
Merged

Dev #11

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 73 additions & 10 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,24 @@ def initialize_column_selectors():
ui.update_selectize("input_cols", choices=cols, selected=default_inputs)
ui.update_selectize("outcome_col", choices=cols, selected=default_outcome)

@reactive.effect
@reactive.event(input.btn_refine)
def handle_refinement():
study = current_study()
if study is None:
return

try:
# We assume your SimulationStudy has a refine method
# that targets the 'Length' gaps we discussed earlier
n_to_gen = input.n_new_samples()
refined_df = study.refine(n_points=n_to_gen) # Or your specific generation logic

new_samples.set(refined_df)
ui.notification_show(f"Generated {n_to_gen} targeted samples.", type="message")
except Exception as e:
ui.notification_show(f"Refinement failed: {e}", type="error")

@render.ui
def selection_error_display():
"""Displays a permanent red error if selections conflict."""
Expand Down Expand Up @@ -605,6 +623,29 @@ def remediation_ui():
)


@render.ui
def download_new_samples_ui():
# Only show the button if new_samples has been populated
if new_samples() is None:
return None

return ui.div(
ui.hr(),
ui.p("Success! Download your targeted samples below:", class_="small"),
ui.download_button(
"download_new_samples",
"Download Refined CSV",
class_="btn-success w-100",
icon=icon_svg("download")
)
)

@render.download(filename="remediation_samples.csv")
def download_new_samples():
df = new_samples()
if df is not None:
yield df.to_csv(index=False)


#### Server - PoD Generation (Tab 4) ####

Expand Down Expand Up @@ -709,19 +750,25 @@ def compute_pod_analysis():
val = results["a90_95"]
a9095_str = f"{val:.3f}" if not np.isnan(val) else "Not Reached"

# 3. Format the Mean Model string based on the new architecture
mean_model = results["mean_model"]
if mean_model.model_type_ == 'Polynomial':
model_str = f"Polynomial (Degree {mean_model.model_params_})"
else:
model_str = "Kriging (Gaussian Process)"


# 3. Create Metrics Dictionary for the UI
# 4. Create Metrics Dictionary for the UI
metrics = {
"Parameter of Interest": results["poi_col"],
"Threshold": results["threshold"],
"a90/95": a9095_str,
"Model Degree": results["mean_model"].best_degree_,
"Mean Model": model_str,
"Smoothing Bandwidth": f"{results['bandwidth']:.4f}",
"Error Distribution": results["dist_info"][0].capitalize()
}
pod_metrics.set(metrics)

# 4. Prepare Data for Download
# 5. Prepare Data for Download
export_df = pd.DataFrame({
"x_defect_size": results["X_eval"],
"pod_mean": results["curves"]["pod"],
Expand All @@ -730,25 +777,32 @@ def compute_pod_analysis():
})
pod_export_data.set(export_df)

# 5. Generate Plots (Visualise draws them internally)
# 6. Generate Plots (Visualise draws them internally)
study.visualise(show=False)
plot_trigger.set(plot_trigger() + 1)

except Exception as e:
ui.notification_show(f"Analysis Failed: {str(e)}", type="error")


# --- RESULTS DISPLAY ---
# --- RESULTS DISPLAY ---
@render.ui
def pod_results_container():
"""
Renders the side-by-side plots and the metrics table.
Renders the model selection plot, side-by-side analysis plots, and the metrics table.
"""
if pod_metrics() is None:
return ui.div()

return ui.div(
# Row 1: Plots
# Row 1: Model Selection Plot (Full Width)
ui.card(
ui.card_header("Model Selection (Bias-Variance Tradeoff)"),
ui.output_plot("plot_model_selection", height="400px"),
full_screen=True,
class_="mb-3"
),
# Row 2: Signal Model and PoD Plots
ui.layout_columns(
ui.card(
ui.card_header("Signal Model Fit"),
Expand All @@ -760,9 +814,10 @@ def pod_results_container():
ui.output_plot("plot_curve"),
full_screen=True
),
col_widths=[6, 6]
col_widths=[6, 6],
class_="mb-3"
),
# Row 2: Table and Download Actions
# Row 3: Table and Download Actions
ui.layout_columns(
ui.card(
ui.card_header("Key Reliability Metrics"),
Expand All @@ -780,6 +835,14 @@ def pod_results_container():
)
)

@render.plot
def plot_model_selection():
_ = plot_trigger() # Dependency on button click
study = current_study()
if study and "model_selection" in study.plots:
return study.plots["model_selection"]
return None

@render.plot
def plot_signal():
_ = plot_trigger() # Dependency on button click
Expand Down
2 changes: 1 addition & 1 deletion app/run_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def inject_js(window):
// Map Button IDs to Filenames
const filenameMap = {
'download_lhs': 'experimental_design.csv',
'download_new_samples_csv': 'refinement_samples.csv',
'download_new_samples': 'refinement_samples.csv',
'download_pod_results': 'pod_analysis_results.csv'
};

Expand Down
36 changes: 36 additions & 0 deletions scripts/kriging_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import numpy as np
import pandas as pd
from digiqual.core import SimulationStudy

print("Generating synthetic non-linear data...")
# 1. Generate Non-linear Data (Sigmoid Curve)
# This shape is difficult for polynomials but perfect for Kriging.
np.random.seed(42)
flaw_sizes = np.linspace(0.1, 10.0, 150)

# Sigmoid function: plateaus at the top and bottom
true_responses = 20 / (1 + np.exp(-1.5 * (flaw_sizes - 5)))
# Add noise that scales slightly with the flaw size
noise = np.random.normal(0, 1.0 + 0.1 * flaw_sizes, size=len(flaw_sizes))
responses = true_responses + noise

df = pd.DataFrame({
'Flaw_Size': flaw_sizes,
'Response': responses
})

# 3. Initialize the Study
print("Initializing SimulationStudy...")
study = SimulationStudy(input_cols=['Flaw_Size'], outcome_col='Response')
study.add_data(df)
study.diagnose()

# 4. Run the PoD Analysis
# We use a threshold that intersects the middle of our S-Curve (e.g., 10.0)
# Using 100 bootstrap iterations so it runs relatively quickly for testing
print("\n--- Running PoD Analysis ---")
results = study.pod(poi_col='Flaw_Size', threshold=10.0, n_boot=100)

# 5. Show the Final Visualizations
print("\n--- Generating Visualizations ---")
study.visualise()
46 changes: 20 additions & 26 deletions scripts/make_fake_data.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,42 @@
import pandas as pd
import numpy as np

def generate_fake_data(filename="initial_data.csv", n=50):
"""Generates a small dataset that might FAIL diagnostics (for testing the 'Fix' loop)."""
np.random.seed(42)

# 1. Generate Inputs (Small N = likely gaps)
def generate_fake_data(filename="app/initial_data.csv", n=25):
"""Fails due to massive Gaps and Skewed Heteroskedasticity."""
# 1. Deliberate Gap (0-2 and 8-10)
lengths = np.concatenate([np.random.uniform(0, 2, 12), np.random.uniform(8, 10, 13)])
df = pd.DataFrame({
'Length': np.random.uniform(0, 10, n),
'Length': lengths,
'Angle': np.random.uniform(-45, 45, n)
})

# 2. Physics & Noise
base_signal = (df['Length'] * 2.0) - (0.1 * df['Angle'].abs())
noise_scale = 0.5 + (0.1 * df['Length'])
noise = np.random.normal(loc=0, scale=noise_scale, size=n)
# 2. Monotonic Physics + Skewed Gamma Noise
# As Length increases, the 'scale' of the Gamma noise increases (Heteroskedasticity)
base_signal = 10.0 + 1.5 * df['Length'] + 0.2 * (df['Length']**2)

df['Signal'] = np.abs(base_signal + noise)
# Non-normal noise: Gamma distribution is always positive and skewed
noise_scale = 0.5 + (0.8 * df['Length'])
noise = np.random.gamma(shape=2.0, scale=noise_scale, size=n)

df['Signal'] = base_signal + noise
df.to_csv(filename, index=False)
print(f"✅ Created '{filename}' with {n} rows (likely to have issues).")

print(f"✅ Created '{filename}' (N={n}). Should fail Gap and Bootstrap.")

def updated_data(filename="sufficient_data.csv", n=200):
"""Generates a large dataset that should PASS all diagnostics."""
np.random.seed(999) # Different seed

# 1. Generate Inputs (Large N = good coverage)
def updated_data(filename="app/sufficient_data.csv", n=1500):
"""Passes because high N overcomes the skewed noise."""
df = pd.DataFrame({
'Length': np.random.uniform(0, 10, n),
'Angle': np.random.uniform(-45, 45, n)
})

# 2. Physics & Noise
base_signal = (df['Length'] * 2.0) - (0.1 * df['Angle'].abs())
noise_scale = 0.5 + (0.1 * df['Length'])
noise = np.random.normal(loc=0, scale=noise_scale, size=n)

df['Signal'] = np.abs(base_signal + noise)
base_signal = 10.0 + 1.5 * df['Length'] + 0.2 * (df['Length']**2)
noise_scale = 0.5 + (0.8 * df['Length'])
noise = np.random.gamma(shape=2.0, scale=noise_scale, size=n)

df['Signal'] = base_signal + noise
df.to_csv(filename, index=False)
print(f"✅ Created '{filename}' with {n} rows (should pass checks).")
print(f"✅ Created '{filename}' (N={n}). Should pass all tests.")

if __name__ == "__main__":
# You can comment out the one you don't want, or run both
generate_fake_data()
updated_data()
17 changes: 13 additions & 4 deletions src/digiqual/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def optimise(
self.data = pd.DataFrame() # Clear old state to avoid duplication
self.add_data(final_data)

#### PoD Analysis ####
#### PoD Analysis ####
def pod(
self,
poi_col: str,
Expand Down Expand Up @@ -308,10 +308,13 @@ def pod(
X = self.clean_data[poi_col].values
y = self.clean_data[self.outcome].values

# 2. Fit Mean Model (Robust Polynomial)
# 2. Fit Mean Model (Robust Regression)
print("1. Selecting Mean Model (Cross-Validation)...")
mean_model = pod.fit_robust_mean_model(X, y)
print(f" -> Selected Polynomial Degree: {mean_model.best_degree_}")
if mean_model.model_type_ == 'Polynomial':
print(f"-> Selected Model: Polynomial (Degree {mean_model.model_params_})")
else:
print("-> Selected Model: Kriging (Gaussian Process)")

# 3. Fit Variance Model & Generate Grid
print("2. Fitting Variance Model (Kernel Smoothing)...")
Expand All @@ -335,7 +338,7 @@ def pod(
print(f"5. Running Bootstrap ({n_boot} iterations)...")
lower_ci, upper_ci = pod.bootstrap_pod_ci(
X, y, X_eval, threshold,
mean_model.best_degree_, bandwidth, (dist_name, dist_params),
mean_model.model_type_, mean_model.model_params_, bandwidth, (dist_name, dist_params),
n_boot=n_boot
)

Expand Down Expand Up @@ -397,6 +400,10 @@ def visualise(self, show: bool = True, save_path: str = None) -> None:
res["X"], res["residuals"], res["X_eval"], res["bandwidth"]
)

# 0. Model Selection Plot (NEW)
if hasattr(res["mean_model"], "cv_scores_"):
self.plots["model_selection"] = pod.plot_model_selection(res["mean_model"].cv_scores_)

# 1. Signal Model Plot
self.plots["signal_model"] = plot_signal_model(
X=res["X"],
Expand All @@ -420,6 +427,8 @@ def visualise(self, show: bool = True, save_path: str = None) -> None:

# Handle Saving
if save_path:
if "model_selection" in self.plots:
self.plots["model_selection"].savefig(f"{save_path}_model_selection.png")
self.plots["signal_model"].get_figure().savefig(f"{save_path}_signal.png")
self.plots["pod_curve"].get_figure().savefig(f"{save_path}_pod.png")
print(f"Plots saved to {save_path}_*.png")
Expand Down
Loading