diff --git a/benchmarks/profiler/webapp/__init__.py b/benchmarks/profiler/webapp/__init__.py
new file mode 100644
index 0000000000..1b4f510e5f
--- /dev/null
+++ b/benchmarks/profiler/webapp/__init__.py
@@ -0,0 +1,7 @@
+import importlib.metadata
+
+from benchmarks.profiler.webapp.main import main
+
+__version__ = importlib.metadata.version("aiconfigurator")
+
+main()
diff --git a/benchmarks/profiler/webapp/core/__init__.py b/benchmarks/profiler/webapp/core/__init__.py
new file mode 100644
index 0000000000..dce7de9dda
--- /dev/null
+++ b/benchmarks/profiler/webapp/core/__init__.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Core functionality for the Dynamo SLA Profiler webapp.
+
+This package contains:
+- constants: Shared constants and configuration
+- profiling: Performance profiling logic using AI Configurator
+"""
diff --git a/benchmarks/profiler/webapp/core/constants.py b/benchmarks/profiler/webapp/core/constants.py
new file mode 100644
index 0000000000..08557007fd
--- /dev/null
+++ b/benchmarks/profiler/webapp/core/constants.py
@@ -0,0 +1,153 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Constants and configuration for the Dynamo SLA Profiler webapp.
+"""
+
+# Table headers for different performance metrics
+PREFILL_TABLE_HEADERS = [
+    "GPUs",
+    "TTFT (ms)",
+    "Throughput (tokens/s/GPU)",
+]
+
+DECODE_TABLE_HEADERS = [
+    "GPUs",
+    "ITL (ms)",
+    "Throughput (tokens/s/GPU)",
+]
+
+COST_TABLE_HEADERS = [
+    "TTFT (ms)",
+    "Prefill Thpt (tokens/s/GPU)",
+    "ITL (ms)",
+    "Decode Thpt (tokens/s/GPU)",
+    "Tokens/User",
+    "Cost ($)",
+]
+
+# Backend version mapping
+BACKEND_VERSIONS = {
+    "trtllm": ["1.0.0", "0.20.0", "0.19.0", "0.18.0"],
+    "vllm": ["0.10.0"],
+    "sglang": ["0.4.5"],
+}
+
+# Supported GPU systems
+GPU_SYSTEMS = [
+    "H100_SXM",
+    "H200_SXM",
+    "A100_SXM",
+    "A100_PCIE",
+]
+
+# Supported inference backends
+INFERENCE_BACKENDS = ["vllm", "sglang", "trtllm"]
+
+# GPU count options
+MIN_GPU_OPTIONS = [1, 2, 4, 8]
+MAX_GPU_OPTIONS = [1, 2, 4, 8, 16]
+
+# Default decode interpolation granularity
+DEFAULT_DECODE_INTERPOLATION_GRANULARITY = 6
+
+# CSS styles for custom table rendering
+TABLE_CSS = """
+<style>
+    .dynamo-table-wrapper {
+        overflow-x: auto;
+        margin-top: 0.5rem;
+    }
+    .dynamo-table {
+        width: 100%;
+        border-collapse: collapse;
+        font-size: 0.95rem;
+    }
+    .dynamo-table thead {
+        background: rgba(255, 255, 255, 0.05);
+        text-transform: uppercase;
+        letter-spacing: 0.02em;
+    }
+    .dynamo-table th,
+    .dynamo-table td {
+        padding: 0.55rem 0.75rem;
+        text-align: left;
+        border-bottom: 1px solid rgba(255, 255, 255, 0.08);
+    }
+    .dynamo-table tbody tr:hover {
+        background: rgba(255, 255, 255, 0.08);
+    }
+    .dynamo-table-empty {
+        text-align: center;
+        padding: 0.85rem 0;
+        opacity: 0.7;
+    }
+</style>
+"""
+
+# Default configuration YAML placeholder
+DEFAULT_CONFIG_YAML = """apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: vllm-disagg
+spec:
+  services:
+    Frontend:
+      dynamoNamespace: vllm-disagg
+      componentType: frontend
+      replicas: 1
+      extraPodSpec:
+        mainContainer:
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"""
+
+# Plot interaction instructions
+PLOT_INTERACTION_INSTRUCTIONS = """
+**How to interact with plots:**
+- **Hover** over points to see detailed information
+- **Click** points to select them (click again to deselect)
+- **Multiple selection**: Click multiple points with shift key or select tools from the top right corner to compare specific configurations
+- The table below each plot will filter to show only selected points, or all points if none are selected
+"""
+
+# Tab descriptions
+PREFILL_TAB_DESCRIPTION = """
+**Prefill Performance**: Interactive plot showing the relationship between Time to First Token (TTFT)
+and throughput per GPU for different GPU counts. **Click points to select/deselect** (multi-select enabled).
+Table shows selected points, or all points if none selected.
+"""
+
+DECODE_TAB_DESCRIPTION = """
+**Decode Performance**: Interactive plot showing the relationship between Inter Token Latency (ITL)
+and throughput per GPU for different GPU counts. **Click points to select/deselect** (multi-select enabled).
+Table shows selected points, or all points if none selected.
+"""
+
+COST_TAB_DESCRIPTION = """
+**Cost Analysis**: Interactive plot showing the cost per 1000 requests under different SLA configurations.
+Lower curves represent better cost efficiency for the same throughput. **Click points to select/deselect** (multi-select enabled).
+Table shows selected points, or all points if none selected.
+"""
+
+# Plotly color palette
+PLOTLY_COLORS = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b"]
+
+# Plotly dark theme configuration
+PLOTLY_DARK_THEME = {
+    "template": "plotly_dark",
+    "plot_bgcolor": "rgba(0, 0, 0, 0)",
+    "paper_bgcolor": "rgba(0, 0, 0, 0)",
+    "modebar": dict(
+        bgcolor="rgba(0, 0, 0, 0)",
+        color="rgba(255, 255, 255, 0.5)",
+        activecolor="rgba(255, 255, 255, 0.9)",
+    ),
+    "legend": dict(
+        yanchor="top",
+        y=0.99,
+        xanchor="left",
+        x=0.01,
+        bgcolor="rgba(0, 0, 0, 0.5)",
+        font=dict(color="white"),
+    ),
+}
diff --git a/benchmarks/profiler/webapp/core/orchestrator.py b/benchmarks/profiler/webapp/core/orchestrator.py
new file mode 100644
index 0000000000..c857cf8c07
--- /dev/null
+++ b/benchmarks/profiler/webapp/core/orchestrator.py
@@ -0,0 +1,185 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Orchestration logic for generating performance plots.
+
+This module contains the main pipeline that coordinates profiling,
+plot generation, and table building.
+"""
+
+from benchmarks.profiler.webapp.core.profiling import (
+    format_status_message,
+    generate_gpu_configurations,
+    initialize_ai_configurator,
+    profile_decode_performance,
+    profile_prefill_performance,
+    validate_inputs,
+)
+from benchmarks.profiler.webapp.ui.plots import (
+    plot_cost_sla_interactive,
+    plot_decode_performance_interactive,
+    plot_prefill_performance_interactive,
+)
+from benchmarks.profiler.webapp.ui.tables import build_all_tables, get_empty_tables
+
+
+def generate_plots(
+    aic_model_name: str,
+    backend: str,
+    config_yaml: str,
+    use_aic: bool,
+    aic_backend: str,
+    aic_backend_version: str,
+    aic_system: str,
+    min_num_gpus_per_engine: int,
+    max_num_gpus_per_engine: int,
+    num_gpus_per_node: int,
+    gpu_cost_per_hour: float,
+    isl: int,
+    osl: int,
+    max_context_length: int,
+    ttft: float,
+    itl: float,
+):
+    """
+    Generate performance plots using AI Configurator estimation.
+
+    This function profiles LLM inference performance by:
+    1. Estimating prefill performance (TTFT) across different GPU counts
+    2. Estimating decode performance (ITL) at various concurrency levels
+    3. Computing cost-vs-SLA tradeoffs based on GPU pricing
+
+    Args:
+        aic_model_name: Model name for AI Configurator (e.g., "QWEN3_32B")
+        backend: Inference backend (vllm, sglang, trtllm) - for reference only
+        config_yaml: YAML configuration string from UI (reserved for future use)
+        use_aic: Whether to use AI Configurator (must be True for webapp)
+        aic_backend: Backend for AI Configurator estimation
+        aic_backend_version: Version of the backend
+        aic_system: GPU system (e.g., "H200_SXM")
+        min_num_gpus_per_engine: Minimum TP size to profile
+        max_num_gpus_per_engine: Maximum TP size to profile
+        num_gpus_per_node: GPUs per node (for MoE models, unused for dense)
+        gpu_cost_per_hour: Cost per GPU per hour in dollars
+        isl: Input sequence length
+        osl: Output sequence length
+        max_context_length: Maximum context length (currently unused)
+        ttft: Target TTFT in milliseconds (for visualization)
+        itl: Target ITL in milliseconds (for visualization)
+
+    Returns:
+        Tuple of (prefill_plot, decode_plot, cost_plot, status_message,
+                  prefill_table_html, decode_table_html, cost_table_html)
+    """
+    empty_prefill_html, empty_decode_html, empty_cost_html = get_empty_tables()
+
+    try:
+        # Validate inputs
+        is_valid, error_msg = validate_inputs(
+            use_aic, aic_model_name, aic_system, aic_backend_version
+        )
+        if not is_valid:
+            return (
+                None,
+                None,
+                None,
+                error_msg,
+                empty_prefill_html,
+                empty_decode_html,
+                empty_cost_html,
+            )
+
+        # Initialize AI Configurator
+        ai_configurator = initialize_ai_configurator(
+            aic_model_name, aic_system, aic_backend, aic_backend_version
+        )
+
+        # Generate GPU configurations to profile
+        profile_num_gpus = generate_gpu_configurations(
+            min_num_gpus_per_engine, max_num_gpus_per_engine
+        )
+
+        if not profile_num_gpus:
+            return (
+                None,
+                None,
+                None,
+                "❌ No valid GPU configurations to profile",
+                empty_prefill_html,
+                empty_decode_html,
+                empty_cost_html,
+            )
+
+        # Profile prefill performance
+        prefill_results = profile_prefill_performance(
+            ai_configurator, profile_num_gpus, isl
+        )
+
+        if not prefill_results[0]:
+            return (
+                None,
+                None,
+                None,
+                "❌ Failed to generate prefill results",
+                empty_prefill_html,
+                empty_decode_html,
+                empty_cost_html,
+            )
+
+        # Profile decode performance
+        decode_results = profile_decode_performance(
+            ai_configurator, profile_num_gpus, isl, osl
+        )
+
+        if not decode_results:
+            return (
+                None,
+                None,
+                None,
+                "❌ Failed to generate decode results",
+                empty_prefill_html,
+                empty_decode_html,
+                empty_cost_html,
+            )
+
+        # Generate interactive plots
+        prefill_plot = plot_prefill_performance_interactive(prefill_results, ttft)
+        decode_plot = plot_decode_performance_interactive(decode_results, itl)
+        cost_plot = plot_cost_sla_interactive(
+            isl, osl, prefill_results, decode_results, gpu_cost_per_hour
+        )
+
+        # Generate success status message
+        status_msg = format_status_message(
+            profile_num_gpus, prefill_results, gpu_cost_per_hour
+        )
+
+        # Build all tables
+        prefill_table_html, decode_table_html, cost_table_html = build_all_tables(
+            prefill_results, decode_results, isl, osl, gpu_cost_per_hour
+        )
+
+        return (
+            prefill_plot,
+            decode_plot,
+            cost_plot,
+            status_msg,
+            prefill_table_html,
+            decode_table_html,
+            cost_table_html,
+        )
+
+    except Exception as e:
+        import traceback
+
+        error_msg = f"❌ Error generating plots:\n{str(e)}\n\n{traceback.format_exc()}"
+        return (
+            None,
+            None,
+            None,
+            error_msg,
+            empty_prefill_html,
+            empty_decode_html,
+            empty_cost_html,
+        )
diff --git a/benchmarks/profiler/webapp/core/profiling.py b/benchmarks/profiler/webapp/core/profiling.py
new file mode 100644
index 0000000000..c78aa242bf
--- /dev/null
+++ b/benchmarks/profiler/webapp/core/profiling.py
@@ -0,0 +1,201 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Performance profiling logic for the Dynamo SLA Profiler webapp.
+
+This module handles the actual performance estimation using AI Configurator,
+including prefill and decode performance profiling.
+"""
+
+import math
+
+from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
+from benchmarks.profiler.utils.profile_decode import get_num_request_range
+from benchmarks.profiler.webapp.core.constants import (
+    DEFAULT_DECODE_INTERPOLATION_GRANULARITY,
+)
+
+
+def validate_inputs(use_aic, aic_model_name, aic_system, aic_backend_version):
+    """
+    Validate AI Configurator inputs.
+
+    Args:
+        use_aic: Whether AI Configurator is enabled
+        aic_model_name: Model name for AI Configurator
+        aic_system: GPU system name
+        aic_backend_version: Backend version
+
+    Returns:
+        Tuple of (is_valid, error_message)
+    """
+    if not use_aic:
+        return False, "❌ Web UI requires AI Configurator mode"
+
+    if not aic_model_name or not aic_system or not aic_backend_version:
+        return False, "❌ Missing required AI Configurator parameters"
+
+    return True, None
+
+
+def initialize_ai_configurator(
+    aic_model_name, aic_system, aic_backend, aic_backend_version
+):
+    """
+    Initialize AI Configurator Performance Estimator.
+
+    Args:
+        aic_model_name: Model name for AI Configurator
+        aic_system: GPU system (e.g., "H200_SXM")
+        aic_backend: Backend for AI Configurator estimation
+        aic_backend_version: Version of the backend
+
+    Returns:
+        AIConfiguratorPerfEstimator instance
+    """
+    return AIConfiguratorPerfEstimator(
+        aic_model_name,
+        aic_system.lower(),
+        aic_backend,
+        aic_backend_version,
+    )
+
+
+def generate_gpu_configurations(min_num_gpus, max_num_gpus):
+    """
+    Generate GPU counts to profile (powers of 2 for dense models).
+
+    Args:
+        min_num_gpus: Minimum number of GPUs
+        max_num_gpus: Maximum number of GPUs
+
+    Returns:
+        List of GPU counts to profile
+    """
+    profile_num_gpus = [
+        2**i
+        for i in range(int(math.log2(max_num_gpus)) + 1)
+        if min_num_gpus <= 2**i <= max_num_gpus
+    ]
+    return profile_num_gpus
+
+
+def profile_prefill_performance(ai_configurator, profile_num_gpus, isl):
+    """
+    Profile prefill performance across different GPU counts.
+
+    Args:
+        ai_configurator: AIConfiguratorPerfEstimator instance
+        profile_num_gpus: List of GPU counts to profile
+        isl: Input sequence length
+
+    Returns:
+        Tuple of (num_gpus_list, ttft_list, thpt_per_gpu_list)
+    """
+    prefill_num_gpus = []
+    prefill_ttft = []
+    prefill_thpt_per_gpu = []
+
+    for num_gpus in profile_num_gpus:
+        # Estimate prefill performance using AI Configurator
+        perf_dict = ai_configurator.estimate_prefill_perf(
+            isl,
+            tp_size=num_gpus,
+        )
+        ttft_val = perf_dict["context_latency"]
+        # Calculate throughput: tokens/second/GPU
+        thpt_val = isl / ttft_val * 1000 / num_gpus
+
+        prefill_num_gpus.append(num_gpus)
+        prefill_ttft.append(ttft_val)
+        prefill_thpt_per_gpu.append(thpt_val)
+
+    return (prefill_num_gpus, prefill_ttft, prefill_thpt_per_gpu)
+
+
+def profile_decode_performance(
+    ai_configurator,
+    profile_num_gpus,
+    isl,
+    osl,
+    decode_interpolation_granularity=DEFAULT_DECODE_INTERPOLATION_GRANULARITY,
+):
+    """
+    Profile decode performance at various concurrency levels.
+
+    Args:
+        ai_configurator: AIConfiguratorPerfEstimator instance
+        profile_num_gpus: List of GPU counts to profile
+        isl: Input sequence length
+        osl: Output sequence length
+        decode_interpolation_granularity: Granularity for decode interpolation
+
+    Returns:
+        List of tuples (num_gpus, itl_list, thpt_per_gpu_list)
+    """
+    decode_results = []
+    # For dense models (not MoE), attention_dp_size = 1
+    attention_dp_size = 1
+
+    for num_gpus in profile_num_gpus:
+        # Get maximum batch size for this configuration
+        max_concurrency = ai_configurator.get_max_batch_size(isl, osl, tp_size=num_gpus)
+
+        # Determine request sweep range
+        sweep_num_request = get_num_request_range(
+            attention_dp_size,
+            max_concurrency,
+            decode_interpolation_granularity,
+        )
+
+        engine_decode_itl = []
+        engine_decode_thpt_per_gpu = []
+
+        for num_request in sweep_num_request:
+            # Estimate decode performance using AI Configurator
+            perf_dict = ai_configurator.estimate_perf(
+                isl,
+                osl,
+                num_request,
+                mode="decode",
+                tp_size=num_gpus,
+            )
+
+            itl_val = perf_dict["tpot"]
+            thpt_val = perf_dict["tokens/s/gpu"]
+
+            engine_decode_itl.append(itl_val)
+            engine_decode_thpt_per_gpu.append(thpt_val)
+
+        # Store results for this GPU configuration
+        if engine_decode_itl:
+            decode_results.append(
+                (num_gpus, engine_decode_itl, engine_decode_thpt_per_gpu)
+            )
+
+    return decode_results
+
+
+def format_status_message(profile_num_gpus, prefill_results, gpu_cost_per_hour):
+    """
+    Format success status message with profiling summary.
+
+    Args:
+        profile_num_gpus: List of GPU counts profiled
+        prefill_results: Prefill profiling results
+        gpu_cost_per_hour: Cost per GPU per hour
+
+    Returns:
+        Formatted status message string
+    """
+    _, prefill_ttft, _ = prefill_results
+    prefill_num_gpus, _, _ = prefill_results
+
+    best_prefill_idx = prefill_ttft.index(min(prefill_ttft))
+    return (
+        f"✅ Plots generated successfully!\n"
+        f"📊 Profiled {len(profile_num_gpus)} GPU configurations: {profile_num_gpus}\n"
+        f"⚡ Best prefill: {min(prefill_ttft):.1f}ms TTFT at {prefill_num_gpus[best_prefill_idx]} GPUs\n"
+        f"💰 GPU Cost: ${gpu_cost_per_hour:.2f}/hour"
+    )
diff --git a/benchmarks/profiler/webapp/main.py b/benchmarks/profiler/webapp/main.py
new file mode 100644
index 0000000000..ce9e563cd3
--- /dev/null
+++ b/benchmarks/profiler/webapp/main.py
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Main entry point for the Dynamo SLA Profiler web application.
+
+This webapp provides an interactive interface for profiling LLM inference performance
+using AI Configurator estimates.
+"""
+
+from benchmarks.profiler.webapp.ui.app import build_interface
+
+
+def main():
+    """Launch the Dynamo SLA Profiler webapp."""
+    # Load custom JavaScript for enhanced interactivity
+    with open("benchmarks/profiler/webapp/static/utils.js", "r") as f:
+        custom_js = f"()=>{{{f.read()}}}"
+
+    # Build and launch the interface
+    demo = build_interface(custom_js)
+    demo.launch(server_name="0.0.0.0")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/profiler/webapp/static/utils.js b/benchmarks/profiler/webapp/static/utils.js
new file mode 100644
index 0000000000..4390769ffa
--- /dev/null
+++ b/benchmarks/profiler/webapp/static/utils.js
@@ -0,0 +1,308 @@
+// Storage for selected points (multi-selection)
+const selectedPointKeys = {
+    prefill: [],
+    decode: [],
+    cost: []
+};
+
+// Storage for all data points
+const allDataPoints = {
+    prefill: [],
+    decode: [],
+    cost: []
+};
+
+// Lookup from point key to row values
+const pointDataLookup = {
+    prefill: {},
+    decode: {},
+    cost: {}
+};
+
+const tableHeaders = {
+    prefill: ["GPUs", "TTFT (ms)", "Throughput (tokens/s/GPU)"],
+    decode: ["GPUs", "ITL (ms)", "Throughput (tokens/s/GPU)"],
+    cost: [
+        "TTFT (ms)",
+        "Prefill Thpt (tokens/s/GPU)",
+        "ITL (ms)",
+        "Decode Thpt (tokens/s/GPU)",
+        "Tokens/User",
+        "Cost ($)"
+    ]
+};
+
+function getTraceUid(trace, fallbackIndex) {
+    if (!trace) {
+        return `trace-${fallbackIndex}`;
+    }
+    return trace.uid || `trace-${fallbackIndex}`;
+}
+
+function makePointKey(traceUid, pointIndex) {
+    return `${traceUid}:${pointIndex}`;
+}
+
+function getDisplayRows(plotType) {
+    if (!selectedPointKeys[plotType] || selectedPointKeys[plotType].length === 0) {
+        return allDataPoints[plotType].map((row) => row.values);
+    }
+
+    const lookup = pointDataLookup[plotType] || {};
+    return selectedPointKeys[plotType]
+        .map((key) => lookup[key])
+        .filter(Boolean)
+        .map((row) => row.values);
+}
+
+function computeSelectedKeys(plotDiv, lookup) {
+    const keys = [];
+    if (!plotDiv || !plotDiv.data) {
+        return keys;
+    }
+
+    plotDiv.data.forEach((trace, traceIdx) => {
+        if (!trace) {
+            return;
+        }
+
+        const traceUid = getTraceUid(trace, traceIdx);
+        const selectedPoints = trace.selectedpoints;
+
+        if (!Array.isArray(selectedPoints) || selectedPoints.length === 0) {
+            return;
+        }
+
+        selectedPoints.forEach((pointIndex) => {
+            const key = makePointKey(traceUid, pointIndex);
+            if (!lookup || lookup[key]) {
+                keys.push(key);
+            }
+        });
+    });
+
+    return keys;
+}
+
+function normalizeRow(row) {
+    if (row == null) {
+        return [];
+    }
+    if (Array.isArray(row)) {
+        return row.slice();
+    }
+    if (typeof row === "object") {
+        if (typeof row[Symbol.iterator] === "function") {
+            return Array.from(row);
+        }
+        return Object.values(row);
+    }
+    return [row];
+}
+
+function formatCell(value) {
+    if (value == null) {
+        return "";
+    }
+    if (typeof value === "number" && Number.isFinite(value)) {
+        if (Number.isInteger(value)) {
+            return value.toString();
+        }
+        return value.toFixed(3);
+    }
+    return `${value}`;
+}
+
+function renderTableHTML(headers, rows) {
+    const safeHeaders = headers || [];
+    const headerCells = safeHeaders.map((header) => `<th>${header}</th>`).join("");
+
+    let bodyHtml = "";
+    if (!rows || rows.length === 0) {
+        bodyHtml = `<tr><td class="dynamo-table-empty" colspan="${safeHeaders.length || 1}">No data selected yet. Click points on the plot to populate this table.</td></tr>`;
+    } else {
+        bodyHtml = rows
+            .map((row) => {
+                const normalized = normalizeRow(row);
+                const length = safeHeaders.length > 0 ? safeHeaders.length : normalized.length;
+                const cells = Array.from({ length }, (_, idx) => {
+                    const value = normalized[idx];
+                    return `<td>${formatCell(value)}</td>`;
+                });
+                return `<tr>${cells.join("")}</tr>`;
+            })
+            .join("");
+    }
+
+    return `
+        <div class="dynamo-table-wrapper">
+            <table class="dynamo-table">
+                <thead><tr>${headerCells}</tr></thead>
+                <tbody>${bodyHtml}</tbody>
+            </table>
+        </div>
+    `;
+}
+
+function updateDataTable(tableId, data, plotType) {
+    const container = document.getElementById(tableId);
+    if (!container) {
+        console.log(`Table container ${tableId} not found`);
+        return;
+    }
+
+    const headers = tableHeaders[plotType] || [];
+    container.innerHTML = renderTableHTML(headers, data);
+    console.log(`Updated table ${tableId} with ${data ? data.length : 0} rows`);
+}
+
+function resizePlotlyGraphs() {
+    const plots = document.querySelectorAll('.js-plotly-plot');
+    console.log(`Found ${plots.length} Plotly graphs`);
+    for (let i = 0; i < plots.length; i++) {
+        if (window.Plotly && plots[i]) {
+            window.Plotly.relayout(plots[i], {autosize: true});
+            console.log(`Resized plot ${i}`);
+        }
+    }
+}
+
+function setupPlotClickHandler(plotId, tableId, plotType) {
+    const attemptSetup = () => {
+        const plotContainer = document.querySelector(`#${plotId}`);
+        if (!plotContainer) {
+            console.log(`Plot ${plotId} not found, retrying...`);
+            setTimeout(attemptSetup, 500);
+            return;
+        }
+
+        const plotDiv = plotContainer.querySelector('.js-plotly-plot');
+        if (!plotDiv) {
+            console.log(`Plotly div not found in ${plotId}, retrying...`);
+            setTimeout(attemptSetup, 500);
+            return;
+        }
+
+        console.log(`Setting up handlers for ${plotId}`);
+
+        const headers = tableHeaders[plotType] || [];
+
+        const syncSelection = (source) => {
+            const lookup = pointDataLookup[plotType] || {};
+            const keys = computeSelectedKeys(plotDiv, lookup);
+            selectedPointKeys[plotType] = keys;
+            updateDataTable(tableId, getDisplayRows(plotType), plotType);
+            console.log(`Selection synced for ${plotType} (${source || 'update'}): ${keys.length} point(s)`);
+        };
+
+        const refreshAllDataPoints = () => {
+            if (!plotDiv || !plotDiv.data) {
+                return;
+            }
+
+            const rows = [];
+            const lookup = {};
+            plotDiv.data.forEach((trace, traceIdx) => {
+                if (!trace || !trace.customdata) {
+                    return;
+                }
+
+                const traceUid = getTraceUid(trace, traceIdx);
+
+                trace.customdata.forEach((item, pointIndex) => {
+                    const normalized = normalizeRow(item);
+                    if (normalized.length === 0) {
+                        return;
+                    }
+
+                    const alignedRow = headers.length
+                        ? headers.map((_, idx) => normalized[idx])
+                        : normalized;
+
+                    const key = makePointKey(traceUid, pointIndex);
+                    const rowObj = { key, values: alignedRow };
+                    rows.push(rowObj);
+                    lookup[key] = rowObj;
+                });
+            });
+
+            const newHash = JSON.stringify(rows.map((row) => [row.key, row.values]));
+            if (plotDiv.__dynamo_data_hash !== newHash) {
+                plotDiv.__dynamo_data_hash = newHash;
+                allDataPoints[plotType] = rows;
+                pointDataLookup[plotType] = lookup;
+                syncSelection('data-refresh');
+                console.log(`Stored ${rows.length} data points for ${plotType}`);
+            }
+        };
+
+        refreshAllDataPoints();
+
+        if (plotDiv.on) {
+            plotDiv.on('plotly_afterplot', refreshAllDataPoints);
+            plotDiv.on('plotly_restyle', refreshAllDataPoints);
+            plotDiv.on('plotly_relayout', refreshAllDataPoints);
+        }
+
+        plotDiv.on('plotly_click', function(data) {
+            console.log(`Click detected on ${plotId}`, data);
+            if (data.points && data.points.length > 0) {
+                setTimeout(() => syncSelection('click'), 0);
+            }
+        });
+
+        if (plotDiv.on) {
+            plotDiv.on('plotly_selected', function(eventData) {
+                if (!eventData || !eventData.points) {
+                    return;
+                }
+
+                syncSelection('selection-tool');
+            });
+
+            plotDiv.on('plotly_deselect', function() {
+                syncSelection('deselect');
+            });
+        }
+
+        console.log(`Handlers configured for ${plotId}`);
+    };
+
+    setTimeout(attemptSetup, 500);
+}
+
+// Wait for DOM to be ready and set up observers
+setTimeout(() => {
+    // Find all tab buttons and add click listeners
+    const tabButtons = document.querySelectorAll('button[role="tab"]');
+    tabButtons.forEach(button => {
+        button.addEventListener('click', () => {
+            setTimeout(resizePlotlyGraphs, 150);
+        });
+    });
+
+    // Use MutationObserver to detect tab visibility changes
+    const observer = new MutationObserver(() => {
+        resizePlotlyGraphs();
+    });
+
+    // Observe changes to elements with tab content
+    const tabPanels = document.querySelectorAll('[role="tabpanel"]');
+    tabPanels.forEach(panel => {
+        observer.observe(panel, {
+            attributes: true,
+            attributeFilter: ['style', 'class', 'hidden']
+        });
+    });
+
+    // Initial resize
+    resizePlotlyGraphs();
+
+    // Setup click handlers for all plots
+    setupPlotClickHandler('prefill_plot', 'prefill_table', 'prefill');
+    setupPlotClickHandler('decode_plot', 'decode_table', 'decode');
+    setupPlotClickHandler('cost_plot', 'cost_table', 'cost');
+}, 1000);
+
+// Also resize on window resize
+window.addEventListener('resize', resizePlotlyGraphs);
\ No newline at end of file
diff --git a/benchmarks/profiler/webapp/ui/__init__.py b/benchmarks/profiler/webapp/ui/__init__.py
new file mode 100644
index 0000000000..02040289f5
--- /dev/null
+++ b/benchmarks/profiler/webapp/ui/__init__.py
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+UI components for the Dynamo SLA Profiler webapp.
+
+This package contains:
+- settings: Model, hardware, and SLA configuration UI components
+- results: Results tabs with plots and tables
+- handlers: Event handlers for UI interactions
+- plots: Interactive Plotly plotting functions
+- tables: Table building and data preparation utilities
+"""
diff --git a/benchmarks/profiler/webapp/ui/app.py b/benchmarks/profiler/webapp/ui/app.py
new file mode 100644
index 0000000000..b76db0bd20
--- /dev/null
+++ b/benchmarks/profiler/webapp/ui/app.py
@@ -0,0 +1,99 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Gradio application interface builder for the Dynamo SLA Profiler.
+
+This module builds the complete Gradio interface by assembling
+all UI components and setting up event handlers.
+"""
+
+import gradio as gr
+
+from benchmarks.profiler.webapp.core.constants import (
+    PLOT_INTERACTION_INSTRUCTIONS,
+    TABLE_CSS,
+)
+from benchmarks.profiler.webapp.core.orchestrator import generate_plots
+from benchmarks.profiler.webapp.ui.handlers import setup_event_handlers
+from benchmarks.profiler.webapp.ui.results import create_results_tabs
+from benchmarks.profiler.webapp.ui.settings import (
+    create_hardware_settings,
+    create_model_settings,
+    create_sla_settings,
+)
+from benchmarks.profiler.webapp.ui.tables import get_empty_tables
+
+
+def build_interface(custom_js: str = None) -> gr.Blocks:
+    """
+    Build the complete Gradio interface for the SLA Profiler.
+
+    Args:
+        custom_js: Optional custom JavaScript to inject into the interface
+
+    Returns:
+        Configured Gradio Blocks interface
+    """
+    with gr.Blocks(title="Dynamo SLA Profiler", js=custom_js) as demo:
+        # Header
+        gr.Markdown("# Dynamo SLA Profiler")
+        gr.Markdown(
+            "Generate performance plots using AI Configurator to estimate profiling results. "
+            "Configure the parameters below and click 'Generate Plots' to see the results."
+        )
+        gr.HTML(TABLE_CSS)
+
+        # Get empty table HTML
+        empty_prefill_html, empty_decode_html, empty_cost_html = get_empty_tables()
+
+        # Store all components for event handlers
+        components = {}
+
+        with gr.Row():
+            # Left panel: Settings
+            with gr.Column(scale=1):
+                # Model and backend settings
+                gr.Markdown("### Dynamo Settings")
+                model_components = create_model_settings()
+                components.update(model_components)
+
+                # Hardware settings
+                gr.Markdown("### Hardware Settings")
+                hardware_components = create_hardware_settings()
+                components.update(hardware_components)
+
+                # SLA settings
+                gr.Markdown("### SLA Settings")
+                sla_components = create_sla_settings()
+                components.update(sla_components)
+
+                # Generate button and status
+                components["generate_btn"] = gr.Button(
+                    "Generate Performance Plots", variant="primary", size="lg"
+                )
+                components["status"] = gr.Textbox(
+                    label="Status",
+                    value="Ready to generate plots",
+                    interactive=False,
+                    show_label=False,
+                    lines=5,
+                )
+
+            # Right panel: Results
+            with gr.Column(min_width=700):
+                gr.Markdown("### Performance Results")
+                gr.Markdown(PLOT_INTERACTION_INSTRUCTIONS)
+
+                results_components = create_results_tabs(
+                    empty_prefill_html, empty_decode_html, empty_cost_html
+                )
+                components.update(results_components)
+
+        # Store demo reference for event handlers
+        components["demo"] = demo
+
+        # Set up all event handlers
+        setup_event_handlers(components, generate_plots)
+
+    return demo
diff --git a/benchmarks/profiler/webapp/ui/handlers.py b/benchmarks/profiler/webapp/ui/handlers.py
new file mode 100644
index 0000000000..26b221fd35
--- /dev/null
+++ b/benchmarks/profiler/webapp/ui/handlers.py
@@ -0,0 +1,87 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Event handlers for UI interactions in the Dynamo SLA Profiler webapp.
+
+This module sets up all event handlers for buttons, dropdowns, and other interactive elements.
+"""
+
+import gradio as gr
+
+from benchmarks.profiler.webapp.core.constants import BACKEND_VERSIONS
+
+
+def setup_event_handlers(components, generate_plots_fn):
+    """
+    Set up event handlers for UI interactions.
+
+    Args:
+        components: Dictionary of all UI components
+        generate_plots_fn: The generate_plots function to call
+
+    Returns:
+        None (modifies components in place)
+    """
+    # Prepare input list for generate_plots
+    inputs = [
+        components["aic_model_name"],
+        components["backend"],
+        components["config_yaml"],
+        components["use_aic"],
+        components["aic_backend"],
+        components["aic_backend_version"],
+        components["aic_system"],
+        components["min_num_gpus_per_engine"],
+        components["max_num_gpus_per_engine"],
+        components["num_gpus_per_node"],
+        components["gpu_cost_per_hour"],
+        components["isl"],
+        components["osl"],
+        components["max_context_length"],
+        components["ttft"],
+        components["itl"],
+    ]
+
+    # Prepare output list for generate_plots
+    outputs = [
+        components["prefill_plot"],
+        components["decode_plot"],
+        components["cost_plot"],
+        components["status"],
+        components["prefill_table"],
+        components["decode_table"],
+        components["cost_table"],
+    ]
+
+    # Generate button click handler
+    components["generate_btn"].click(
+        fn=generate_plots_fn,
+        inputs=inputs,
+        outputs=outputs,
+    )
+
+    # Auto-generate plots on load with default values
+    components["demo"].load(
+        fn=generate_plots_fn,
+        inputs=inputs,
+        outputs=outputs,
+    )
+
+    # Toggle AI Configurator fields visibility
+    components["use_aic"].change(
+        fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
+        inputs=[components["use_aic"]],
+        outputs=[components["aic_backend"], components["aic_backend_version"]],
+    )
+
+    # Update backend version choices when backend changes
+    def update_backend_versions(backend):
+        versions = BACKEND_VERSIONS.get(backend, ["1.0.0"])
+        return gr.update(choices=versions, value=versions[0])
+
+    components["aic_backend"].change(
+        fn=update_backend_versions,
+        inputs=[components["aic_backend"]],
+        outputs=[components["aic_backend_version"]],
+    )
diff --git a/benchmarks/profiler/webapp/ui/plots.py b/benchmarks/profiler/webapp/ui/plots.py
new file mode 100644
index 0000000000..29fa1694c4
--- /dev/null
+++ b/benchmarks/profiler/webapp/ui/plots.py
@@ -0,0 +1,293 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Interactive plotting functions for Gradio webapp using Plotly.
+
+This module provides interactive versions of the profiler plots using Plotly,
+which integrates seamlessly with Gradio's gr.Plot component.
+"""
+
+import numpy as np
+import plotly.graph_objects as go
+
+from benchmarks.profiler.utils.parato import compute_parato
+from benchmarks.profiler.webapp.core.constants import PLOTLY_COLORS, PLOTLY_DARK_THEME
+
+
+def _configure_dark_theme(fig, title, xaxis_title, yaxis_title):
+    """
+    Apply dark theme configuration to a Plotly figure.
+
+    Args:
+        fig: Plotly Figure object
+        title: Plot title
+        xaxis_title: X-axis title
+        yaxis_title: Y-axis title
+    """
+    fig.update_layout(
+        title={
+            "text": title,
+            "x": 0.5,
+            "xanchor": "center",
+            "font": {"size": 18 if len(title) < 60 else 16},
+        },
+        xaxis_title=xaxis_title,
+        yaxis_title=yaxis_title,
+        hovermode="closest",
+        showlegend=True,
+        autosize=True,
+        clickmode="event+select",  # Enable click selection
+        **PLOTLY_DARK_THEME,
+    )
+
+    # Add grid
+    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.3)")
+    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.3)")
+
+
+def _add_target_line(fig, target_value, label, max_y):
+    """
+    Add a target reference line to a plot.
+
+    Args:
+        fig: Plotly Figure object
+        target_value: X-coordinate of the vertical line
+        label: Label for the target line
+        max_y: Maximum Y value for the line
+    """
+    fig.add_trace(
+        go.Scatter(
+            x=[target_value, target_value],
+            y=[0, max_y * 1.1],
+            mode="lines",
+            line=dict(color="red", width=2, dash="dash"),
+            name=label,
+            hovertemplate=f"{label}<extra></extra>",
+        )
+    )
+
+
+def _configure_selection_style(fig, mode, selected_color="red", selected_size=16):
+    """
+    Configure selection appearance for interactive plots.
+
+    Args:
+        fig: Plotly Figure object
+        mode: Trace mode (e.g., "markers+text", "lines+markers")
+        selected_color: Color for selected markers
+        selected_size: Size for selected markers
+    """
+    fig.update_traces(
+        selected=dict(marker=dict(color=selected_color, size=selected_size)),
+        unselected=dict(marker=dict(opacity=0.4 if "text" in mode else 0.5)),
+        selector=dict(mode=mode),
+    )
+
+
+def plot_prefill_performance_interactive(
+    prefill_results: tuple, target_ttft: float
+) -> go.Figure:
+    """
+    Create interactive Plotly plot for prefill performance.
+
+    Args:
+        prefill_results: Tuple of (num_gpus_list, ttft_list, thpt_per_gpu_list)
+        target_ttft: Target TTFT in milliseconds (for reference line)
+
+    Returns:
+        Plotly Figure object for Gradio gr.Plot
+    """
+    num_gpus_list, ttft_list, thpt_per_gpu_list = prefill_results
+
+    fig = go.Figure()
+
+    # Add scatter plot for data points with custom data
+    fig.add_trace(
+        go.Scatter(
+            x=ttft_list,
+            y=thpt_per_gpu_list,
+            mode="markers+text",
+            marker=dict(size=12, color="blue", line=dict(width=2, color="darkblue")),
+            text=[f"{n} GPU(s)" for n in num_gpus_list],
+            textposition="top center",
+            textfont=dict(size=10),
+            name="GPU Configurations",
+            hovertemplate="<b>%{text}</b><br>"
+            + "TTFT: %{x:.2f} ms<br>"
+            + "Throughput: %{y:.2f} tokens/s/GPU<br>"
+            + "<extra></extra>",
+            customdata=list(zip(num_gpus_list, ttft_list, thpt_per_gpu_list)),
+        )
+    )
+
+    # Add target TTFT line
+    max_thpt = max(thpt_per_gpu_list) if thpt_per_gpu_list else 1000
+    _add_target_line(fig, target_ttft, f"Target TTFT: {target_ttft} ms", max_thpt)
+
+    # Apply dark theme and configure layout
+    _configure_dark_theme(
+        fig,
+        "Prefill Performance",
+        "Time to First Token (ms)",
+        "Prefill Throughput per GPU (tokens/s/GPU)",
+    )
+
+    # Configure selection appearance
+    _configure_selection_style(
+        fig, "markers+text", selected_color="red", selected_size=16
+    )
+
+    return fig
+
+
+def plot_decode_performance_interactive(
+    decode_results: list, target_itl: float
+) -> go.Figure:
+    """
+    Create interactive Plotly plot for decode performance.
+
+    Args:
+        decode_results: List of tuples (num_gpus, itl_list, thpt_per_gpu_list)
+        target_itl: Target ITL in milliseconds (for reference line)
+
+    Returns:
+        Plotly Figure object for Gradio gr.Plot
+    """
+    fig = go.Figure()
+
+    # Plot each GPU configuration
+    for idx, (num_gpus, itl_list, thpt_per_gpu_list) in enumerate(decode_results):
+        color = PLOTLY_COLORS[idx % len(PLOTLY_COLORS)]
+        # Prepare custom data for each point
+        customdata = [
+            [num_gpus, itl, thpt] for itl, thpt in zip(itl_list, thpt_per_gpu_list)
+        ]
+
+        fig.add_trace(
+            go.Scatter(
+                x=itl_list,
+                y=thpt_per_gpu_list,
+                mode="lines+markers",
+                marker=dict(size=8, color=color),
+                line=dict(color=color, width=2),
+                name=f"{num_gpus} GPU(s)",
+                hovertemplate=f"<b>{num_gpus} GPU(s)</b><br>"
+                + "ITL: %{x:.2f} ms<br>"
+                + "Throughput: %{y:.2f} tokens/s/GPU<br>"
+                + "<extra></extra>",
+                customdata=customdata,
+            )
+        )
+
+    # Add target ITL line
+    all_thpt = [
+        thpt for _, _, thpt_list in decode_results for thpt in thpt_list if thpt_list
+    ]
+    max_thpt = max(all_thpt) if all_thpt else 1000
+    _add_target_line(fig, target_itl, f"Target ITL: {target_itl} ms", max_thpt)
+
+    # Apply dark theme and configure layout
+    _configure_dark_theme(
+        fig,
+        "Decode Performance",
+        "Inter Token Latency (ms)",
+        "Decode Throughput per GPU (tokens/s/GPU)",
+    )
+
+    # Configure selection appearance for markers
+    _configure_selection_style(
+        fig, "lines+markers", selected_color="yellow", selected_size=12
+    )
+
+    return fig
+
+
+def plot_cost_sla_interactive(
+    isl: int,
+    osl: int,
+    prefill_results: tuple,
+    decode_results: list,
+    gpu_cost_per_hour: float = 3.0,
+) -> go.Figure:
+    """
+    Create interactive Plotly plot for cost vs SLA analysis.
+
+    Args:
+        isl: Input sequence length
+        osl: Output sequence length
+        prefill_results: Tuple of (num_gpus, ttft, thpt_per_gpu) for prefill
+        decode_results: List of tuples (num_gpus, itl_list, thpt_per_gpu_list) for decode
+        gpu_cost_per_hour: Cost per GPU per hour in dollars (default: 3.0)
+
+    Returns:
+        Plotly Figure object for Gradio gr.Plot
+    """
+    # Compute Pareto fronts
+    p_ttft, p_thpt = compute_parato(prefill_results[1], prefill_results[2])
+
+    _d_itl, _d_thpt = [], []
+    for _d_result in decode_results:
+        _d_itl.extend(_d_result[1])
+        _d_thpt.extend(_d_result[2])
+    d_itl, d_thpt = compute_parato(_d_itl, _d_thpt)
+
+    # Convert to numpy arrays for element-wise operations
+    p_ttft = np.array(p_ttft)
+    p_thpt = np.array(p_thpt)
+    d_itl = np.array(d_itl)
+    d_thpt = np.array(d_thpt)
+
+    # Calculate cost metrics
+    fig = go.Figure()
+
+    for idx, (_p_ttft, _p_thpt) in enumerate(zip(p_ttft, p_thpt)):
+        # Calculate costs for this TTFT curve
+        prefill_cost = isl * 1000 / _p_thpt * gpu_cost_per_hour / 3600
+
+        # Calculate tokens per user and cost arrays (element-wise operations)
+        tokens_per_user_array = 1000 / d_itl  # Element-wise division with numpy array
+        cost_array = osl * 1000 / d_thpt * gpu_cost_per_hour / 3600 + prefill_cost
+
+        color = PLOTLY_COLORS[idx % len(PLOTLY_COLORS)]
+
+        # Prepare custom data for each point
+        customdata = [
+            [
+                _p_ttft,
+                _p_thpt,
+                float(d_itl[i]),
+                float(d_thpt[i]),
+                float(tokens_per_user_array[i]),
+                float(cost_array[i]),
+            ]
+            for i in range(len(d_itl))
+        ]
+
+        # Add line plot for this TTFT curve
+        fig.add_trace(
+            go.Scatter(
+                x=tokens_per_user_array,
+                y=cost_array,
+                mode="lines+markers",
+                marker=dict(size=10, symbol="x", color=color, line=dict(width=2)),
+                line=dict(color=color, width=2),
+                name=f"TTFT: {_p_ttft:.2f}ms",
+                hovertemplate=f"<b>TTFT: {_p_ttft:.2f}ms</b><br>"
+                + "Tokens/User: %{x:.2f}<br>"
+                + "Cost: $%{y:.3f}<br>"
+                + "<extra></extra>",
+                customdata=customdata,
+            )
+        )
+
+    # Apply dark theme and configure layout
+    title = f"Cost Per 1000 i{isl}o{osl} requests (GPU/hour = ${gpu_cost_per_hour:.2f}) Under Different SLA"
+    _configure_dark_theme(fig, title, "Tokens per User", "Cost ($)")
+
+    # Configure selection appearance for markers
+    _configure_selection_style(
+        fig, "lines+markers", selected_color="yellow", selected_size=14
+    )
+
+    return fig
diff --git a/benchmarks/profiler/webapp/ui/results.py b/benchmarks/profiler/webapp/ui/results.py
new file mode 100644
index 0000000000..2496f5cf1d
--- /dev/null
+++ b/benchmarks/profiler/webapp/ui/results.py
@@ -0,0 +1,77 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+UI components for results display in the Dynamo SLA Profiler webapp.
+
+This module provides functions to build the results tabs with plots and tables.
+"""
+
+import gradio as gr
+
+from benchmarks.profiler.webapp.core.constants import (
+    COST_TAB_DESCRIPTION,
+    DECODE_TAB_DESCRIPTION,
+    PREFILL_TAB_DESCRIPTION,
+)
+
+
+def create_results_tabs(empty_prefill_html, empty_decode_html, empty_cost_html):
+    """
+    Create the results tabs with plots and tables.
+
+    Args:
+        empty_prefill_html: Empty prefill table HTML
+        empty_decode_html: Empty decode table HTML
+        empty_cost_html: Empty cost table HTML
+
+    Returns:
+        Dictionary of Gradio components
+    """
+    with gr.Tab("Prefill Performance"):
+        prefill_plot = gr.Plot(
+            label="Prefill Performance",
+            show_label=False,
+            elem_id="prefill_plot",
+        )
+        gr.Markdown(PREFILL_TAB_DESCRIPTION)
+        gr.Markdown("#### Data Points")
+        prefill_table = gr.HTML(
+            value=empty_prefill_html,
+            elem_id="prefill_table",
+        )
+
+    with gr.Tab("Decode Performance"):
+        decode_plot = gr.Plot(
+            label="Decode Performance",
+            show_label=False,
+            elem_id="decode_plot",
+        )
+        gr.Markdown(DECODE_TAB_DESCRIPTION)
+        gr.Markdown("#### Data Points")
+        decode_table = gr.HTML(
+            value=empty_decode_html,
+            elem_id="decode_table",
+        )
+
+    with gr.Tab("Cost vs SLA"):
+        cost_plot = gr.Plot(
+            label="Cost vs SLA",
+            show_label=False,
+            elem_id="cost_plot",
+        )
+        gr.Markdown(COST_TAB_DESCRIPTION)
+        gr.Markdown("#### Data Points")
+        cost_table = gr.HTML(
+            value=empty_cost_html,
+            elem_id="cost_table",
+        )
+
+    return {
+        "prefill_plot": prefill_plot,
+        "decode_plot": decode_plot,
+        "cost_plot": cost_plot,
+        "prefill_table": prefill_table,
+        "decode_table": decode_table,
+        "cost_table": cost_table,
+    }
diff --git a/benchmarks/profiler/webapp/ui/settings.py b/benchmarks/profiler/webapp/ui/settings.py
new file mode 100644
index 0000000000..2ea129ee00
--- /dev/null
+++ b/benchmarks/profiler/webapp/ui/settings.py
@@ -0,0 +1,192 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+UI components for settings panels in the Dynamo SLA Profiler webapp.
+
+This module provides functions to build the settings UI sections:
+- Model and backend configuration
+- Hardware configuration (GPUs, cost)
+- SLA parameters (ISL, OSL, TTFT, ITL)
+"""
+
+import gradio as gr
+from aiconfigurator.sdk.common import SupportedModels
+
+from benchmarks.profiler.webapp.core.constants import (
+    BACKEND_VERSIONS,
+    DEFAULT_CONFIG_YAML,
+    GPU_SYSTEMS,
+    INFERENCE_BACKENDS,
+    MAX_GPU_OPTIONS,
+    MIN_GPU_OPTIONS,
+)
+
+
+def create_model_settings():
+    """
+    Create the model and backend settings UI.
+
+    Returns:
+        Dictionary of Gradio components
+    """
+    with gr.Group():
+        with gr.Row():
+            supported_models = list(SupportedModels.keys())
+            aic_model_name = gr.Dropdown(
+                label="Model",
+                choices=supported_models,
+                value=supported_models[0],
+                info="Model to profile",
+            )
+
+            backend = gr.Dropdown(
+                label="Backend",
+                choices=INFERENCE_BACKENDS,
+                value="trtllm",
+                info="Inference backend",
+            )
+
+        config_yaml = gr.Textbox(
+            label="Config (YAML)",
+            placeholder=DEFAULT_CONFIG_YAML,
+            lines=5,
+            info="DynamoGraphDeployment YAML configuration",
+        )
+
+        use_aic = gr.Checkbox(
+            label="Use AI Configurator",
+            value=True,
+            info="Use AI Configurator to estimate performance",
+        )
+
+        with gr.Row():
+            aic_backend = gr.Dropdown(
+                label="AI Configurator Backend",
+                choices=INFERENCE_BACKENDS,
+                value="trtllm",
+                info="Backend for AI Configurator estimation",
+                visible=True,
+            )
+
+            aic_backend_version = gr.Dropdown(
+                label="AI Configurator Backend Version",
+                choices=BACKEND_VERSIONS["trtllm"],
+                value="0.20.0",
+                info="Backend version for AI Configurator",
+                allow_custom_value=True,
+                visible=True,
+            )
+
+    return {
+        "aic_model_name": aic_model_name,
+        "backend": backend,
+        "config_yaml": config_yaml,
+        "use_aic": use_aic,
+        "aic_backend": aic_backend,
+        "aic_backend_version": aic_backend_version,
+    }
+
+
+def create_hardware_settings():
+    """
+    Create the hardware configuration UI.
+
+    Returns:
+        Dictionary of Gradio components
+    """
+    with gr.Group():
+        with gr.Row():
+            aic_system = gr.Dropdown(
+                label="System",
+                choices=GPU_SYSTEMS,
+                value="H200_SXM",
+                info="Target GPU system",
+            )
+
+            gpu_cost_per_hour = gr.Number(
+                label="Cost per GPU Hour ($)",
+                value=3.0,
+                info="Cost per GPU per hour in dollars",
+            )
+
+        with gr.Row():
+            min_num_gpus_per_engine = gr.Dropdown(
+                label="Min GPUs per Engine",
+                choices=MIN_GPU_OPTIONS,
+                value=1,
+                info="Minimum number of GPUs (TP size)",
+            )
+
+            max_num_gpus_per_engine = gr.Dropdown(
+                label="Max GPUs per Engine",
+                choices=MAX_GPU_OPTIONS,
+                value=4,
+                info="Maximum number of GPUs (TP size)",
+            )
+
+        num_gpus_per_node = gr.Number(
+            label="GPUs per Node",
+            value=8,
+            info="Number of GPUs per node (for MoE models)",
+        )
+
+    return {
+        "aic_system": aic_system,
+        "gpu_cost_per_hour": gpu_cost_per_hour,
+        "min_num_gpus_per_engine": min_num_gpus_per_engine,
+        "max_num_gpus_per_engine": max_num_gpus_per_engine,
+        "num_gpus_per_node": num_gpus_per_node,
+    }
+
+
+def create_sla_settings():
+    """
+    Create the SLA configuration UI.
+
+    Returns:
+        Dictionary of Gradio components
+    """
+    with gr.Group():
+        with gr.Row():
+            isl = gr.Number(
+                label="Input Sequence Length (ISL)",
+                value=5000,
+                precision=0,
+                info="Target input sequence length",
+            )
+
+            osl = gr.Number(
+                label="Output Sequence Length (OSL)",
+                value=50,
+                precision=0,
+                info="Target output sequence length",
+            )
+
+        with gr.Row():
+            max_context_length = gr.Number(
+                label="Max Context Length",
+                value=8192,
+                precision=0,
+                info="Maximum context length supported by the model",
+            )
+
+            ttft = gr.Number(
+                label="Target TTFT (ms)",
+                value=50.0,
+                info="Target Time To First Token in milliseconds",
+            )
+
+        itl = gr.Number(
+            label="Target ITL (ms)",
+            value=10.0,
+            info="Target Inter Token Latency in milliseconds",
+        )
+
+    return {
+        "isl": isl,
+        "osl": osl,
+        "max_context_length": max_context_length,
+        "ttft": ttft,
+        "itl": itl,
+    }
diff --git a/benchmarks/profiler/webapp/ui/tables.py b/benchmarks/profiler/webapp/ui/tables.py
new file mode 100644
index 0000000000..247cf5f7c4
--- /dev/null
+++ b/benchmarks/profiler/webapp/ui/tables.py
@@ -0,0 +1,187 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Table building and data preparation utilities for the Dynamo SLA Profiler webapp.
+"""
+
+from numbers import Real
+
+import numpy as np
+
+from benchmarks.profiler.utils.parato import compute_parato
+from benchmarks.profiler.webapp.core.constants import (
+    COST_TABLE_HEADERS,
+    DECODE_TABLE_HEADERS,
+    PREFILL_TABLE_HEADERS,
+)
+
+
+def _format_cell(value):
+    """Format a cell value for display in HTML table."""
+    if isinstance(value, bool):
+        return "✅" if value else "❌"
+    if isinstance(value, Real) and not isinstance(value, bool):
+        if isinstance(value, int):
+            return f"{value}"
+        return f"{value:.3f}"
+    return str(value)
+
+
+def build_table_html(headers, rows):
+    """
+    Build an HTML table from headers and rows.
+
+    Args:
+        headers: List of header strings
+        rows: List of row data (each row is a list of values)
+
+    Returns:
+        HTML string containing the table
+    """
+    header_html = "".join(f"<th>{header}</th>" for header in headers)
+
+    if not rows:
+        empty_row = (
+            f"<tr><td class='dynamo-table-empty' colspan='{len(headers)}'>"
+            "No data selected yet. Click points on the plot to populate this table."
+            "</td></tr>"
+        )
+        body_html = empty_row
+    else:
+        body_html = "".join(
+            "<tr>" + "".join(f"<td>{_format_cell(cell)}</td>" for cell in row) + "</tr>"
+            for row in rows
+        )
+
+    return (
+        "<div class='dynamo-table-wrapper'>"
+        "<table class='dynamo-table'>"
+        f"<thead><tr>{header_html}</tr></thead>"
+        f"<tbody>{body_html}</tbody>"
+        "</table>"
+        "</div>"
+    )
+
+
+def get_empty_tables():
+    """Get empty table HTML for all three table types."""
+    return (
+        build_table_html(PREFILL_TABLE_HEADERS, []),
+        build_table_html(DECODE_TABLE_HEADERS, []),
+        build_table_html(COST_TABLE_HEADERS, []),
+    )
+
+
+def prepare_prefill_table_data(prefill_results):
+    """
+    Prepare table data for prefill performance.
+
+    Args:
+        prefill_results: Tuple of (num_gpus_list, ttft_list, thpt_per_gpu_list)
+
+    Returns:
+        List of rows for the table
+    """
+    num_gpus_list, ttft_list, thpt_per_gpu_list = prefill_results
+    return [
+        [num_gpus, round(ttft, 3), round(thpt, 3)]
+        for num_gpus, ttft, thpt in zip(num_gpus_list, ttft_list, thpt_per_gpu_list)
+    ]
+
+
+def prepare_decode_table_data(decode_results):
+    """
+    Prepare table data for decode performance.
+
+    Args:
+        decode_results: List of tuples (num_gpus, itl_list, thpt_list)
+
+    Returns:
+        List of rows for the table
+    """
+    table_data = []
+    for num_gpus, itl_list, thpt_list in decode_results:
+        for itl, thpt in zip(itl_list, thpt_list):
+            table_data.append([num_gpus, round(itl, 3), round(thpt, 3)])
+    return table_data
+
+
+def prepare_cost_table_data(
+    isl, osl, prefill_results, decode_results, gpu_cost_per_hour
+):
+    """
+    Prepare table data for cost analysis.
+
+    Args:
+        isl: Input sequence length
+        osl: Output sequence length
+        prefill_results: Tuple of (num_gpus, ttft, thpt_per_gpu) for prefill
+        decode_results: List of tuples (num_gpus, itl_list, thpt_per_gpu_list) for decode
+        gpu_cost_per_hour: Cost per GPU per hour in dollars
+
+    Returns:
+        List of rows for the table
+    """
+    # Compute Pareto fronts
+    p_ttft, p_thpt = compute_parato(prefill_results[1], prefill_results[2])
+
+    _d_itl, _d_thpt = [], []
+    for _d_result in decode_results:
+        _d_itl.extend(_d_result[1])
+        _d_thpt.extend(_d_result[2])
+    d_itl, d_thpt = compute_parato(_d_itl, _d_thpt)
+
+    # Convert to numpy arrays
+    p_ttft = np.array(p_ttft)
+    p_thpt = np.array(p_thpt)
+    d_itl = np.array(d_itl)
+    d_thpt = np.array(d_thpt)
+
+    # Calculate cost data
+    table_data = []
+    for _p_ttft, _p_thpt in zip(p_ttft, p_thpt):
+        prefill_cost = isl * 1000 / _p_thpt * gpu_cost_per_hour / 3600
+        tokens_per_user_array = 1000 / d_itl
+        cost_array = osl * 1000 / d_thpt * gpu_cost_per_hour / 3600 + prefill_cost
+
+        for i in range(len(d_itl)):
+            table_data.append(
+                [
+                    round(float(_p_ttft), 3),
+                    round(float(_p_thpt), 3),
+                    round(float(d_itl[i]), 3),
+                    round(float(d_thpt[i]), 3),
+                    round(float(tokens_per_user_array[i]), 3),
+                    round(float(cost_array[i]), 3),
+                ]
+            )
+
+    return table_data
+
+
+def build_all_tables(prefill_results, decode_results, isl, osl, gpu_cost_per_hour):
+    """
+    Build all three table HTMLs from profiling results.
+
+    Args:
+        prefill_results: Prefill profiling results
+        decode_results: Decode profiling results
+        isl: Input sequence length
+        osl: Output sequence length
+        gpu_cost_per_hour: Cost per GPU per hour
+
+    Returns:
+        Tuple of (prefill_table_html, decode_table_html, cost_table_html)
+    """
+    prefill_data = prepare_prefill_table_data(prefill_results)
+    decode_data = prepare_decode_table_data(decode_results)
+    cost_data = prepare_cost_table_data(
+        isl, osl, prefill_results, decode_results, gpu_cost_per_hour
+    )
+
+    return (
+        build_table_html(PREFILL_TABLE_HEADERS, prefill_data),
+        build_table_html(DECODE_TABLE_HEADERS, decode_data),
+        build_table_html(COST_TABLE_HEADERS, cost_data),
+    )
diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml
index d99b7c611c..f1aeb8fba8 100644
--- a/benchmarks/pyproject.toml
+++ b/benchmarks/pyproject.toml
@@ -48,6 +48,8 @@ dependencies = [
     "types-tabulate",
     "transformers",
     "pytest-mypy",
+    "gradio>=5.49.1",
+    "plotly>=6.4.0",
 ]
 
 [project.scripts]