From 7e90a738389bf718091663f2971fc44adc622cea Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Tue, 4 Mar 2025 19:44:03 -0500
Subject: [PATCH 1/9] set up data for ui

---
 src/guidellm/core/result.py            |  16 +++
 src/guidellm/main.py                   |   4 +-
 src/guidellm/utils/__init__.py         |   2 +
 src/guidellm/utils/generate_ui_data.py | 138 +++++++++++++++++++++++++
 4 files changed, 159 insertions(+), 1 deletion(-)
 create mode 100644 src/guidellm/utils/generate_ui_data.py

diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py
index f218784c..8942b675 100644
--- a/src/guidellm/core/result.py
+++ b/src/guidellm/core/result.py
@@ -403,6 +403,22 @@ def output_token_throughput(self) -> float:
 
         return total_tokens / self.duration
 
+    @property
+    def output_token_throughput_distribution(self) -> Distribution:
+        """
+        Get the distribution for output token throughput.
+
+        :return: The distribution of output token throughput.
+        :rtype: Distribution
+        """
+        throughputs = []
+        for r in self.results:
+            duration = (r.end_time or 0) - (r.start_time or 0)
+            if duration > 0:
+                throughputs.append(r.output_token_count / duration)
+
+        return Distribution(data=throughputs)
+
     @property
     def prompt_token_distribution(self) -> Distribution:
         """
diff --git a/src/guidellm/main.py b/src/guidellm/main.py
index 4016ecec..01b744d2 100644
--- a/src/guidellm/main.py
+++ b/src/guidellm/main.py
@@ -14,6 +14,7 @@
 )
 from guidellm.request.base import RequestGenerator
 from guidellm.utils import BenchmarkReportProgress, cli_params
+from guidellm.utils.generate_ui_data import generate_ui_api_data
 
 __all__ = ["generate_benchmark_report"]
 
@@ -183,7 +184,6 @@ def generate_benchmark_report_cli(
         cont_refresh_table=enable_continuous_refresh,
     )
 
-
 def generate_benchmark_report(
     target: str,
     backend: BackendEnginePublic,
@@ -285,6 +285,8 @@ def generate_benchmark_report(
     )
     report = asyncio.run(_run_executor_for_result(executor))
 
+    generate_ui_api_data(report)
+
     # Save and print report
     guidance_report = GuidanceReport()
     guidance_report.benchmarks.append(report)
diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
index 2fdd8ca8..96c02049 100644
--- a/src/guidellm/utils/__init__.py
+++ b/src/guidellm/utils/__init__.py
@@ -1,4 +1,5 @@
 from .injector import create_report, inject_data
+from .generate_ui_data import generate_ui_api_data
 from .progress import BenchmarkReportProgress
 from .text import (
     clean_text,
@@ -24,6 +25,7 @@
     "clean_text",
     "create_report",
     "filter_text",
+    "generate_ui_api_data",
     "inject_data",
     "is_path",
     "is_path_like",
diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
new file mode 100644
index 00000000..e9415cc7
--- /dev/null
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -0,0 +1,138 @@
+import os
+import json
+import random
+from typing import Any, Dict, List
+from guidellm.core.distribution import Distribution
+from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark
+
+def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
+    total = dist.__len__()
+    mean = dist.mean
+    median = dist.median
+    minv = dist.min
+    maxv = dist.max
+    std_dev = dist.std_deviation
+
+    pvals = dist.percentiles([50, 90, 95, 99])
+
+    percentile_list = [
+        {"percentile": "p50", "value": pvals[0]},
+        {"percentile": "p90", "value": pvals[1]},
+        {"percentile": "p95", "value": pvals[2]},
+        {"percentile": "p99", "value": pvals[3]},
+    ]
+
+    if dist.range == 0:
+        buckets = [{"value": minv, "count": total}]
+        bucket_width = 0
+    else:
+        bucket_width = dist.range / n_buckets
+        bucket_counts = [0] * n_buckets
+
+        for val in dist.data:
+
+            idx = int((val - minv) // bucket_width)
+            if idx == n_buckets:
+                idx = n_buckets - 1
+            bucket_counts[idx] += 1
+
+        buckets = []
+        for i, count in enumerate(bucket_counts):
+            bucket_start = minv + i * bucket_width
+            buckets.append({
+                "value": bucket_start,
+                "count": count
+            })
+
+    return {
+        metric_label: {
+            "statistics": {
+                "total": total,
+                "mean": mean,
+                "median": median,
+                "min": minv,
+                "max": maxv,
+                "std": std_dev,
+            },
+            "percentiles": percentile_list,
+            "buckets": buckets,
+            "bucketWidth": bucket_width,
+        }
+    }
+
+def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
+    timestamp = max(map(lambda bm: bm.end_time, report.benchmarks))
+    return {
+        "model": {
+            "name": report.args.get('model', 'N/A'),
+            "size": 0
+        },
+        "task": "N/A",
+        "dataset": "N/A",
+        "timestamp": timestamp
+    }
+
+def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
+    all_prompt_token_data = [data for benchmark in report.benchmarks for data in benchmark.prompt_token_distribution.data]
+    all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
+    all_output_token_data = [data for benchmark in report.benchmarks for data in benchmark.output_token_distribution.data]
+    all_output_token_distribution = Distribution(data=all_output_token_data)
+
+    prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
+    prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results]
+    sample_prompts = random.sample(prompt_token_samples, min(5, len(prompt_token_samples)))
+    output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
+    output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results]
+    sample_outputs = random.sample(output_token_samples, min(5, len(output_token_samples)))
+    return {
+        "prompts": {
+            "samples": sample_prompts,
+            **prompt_token_data
+        },
+        "generation": {
+            "samples": sample_outputs,
+            **output_token_data
+        },
+        "server": {
+            "target": report.args.get('target', 'N/A')
+        }
+    }
+
+def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
+    ttft_dist_ms = Distribution(data=[val * 1000 for val in bm.ttft_distribution.data])
+    ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
+    tpot_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data])
+    tpot_data = generate_metric_report(tpot_dist_ms, 'tpot')
+    throughput_dist_ms = Distribution(data=[val * 1000 for val in bm.output_token_throughput_distribution.data])
+    throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
+    latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data])
+    time_per_request_data = generate_metric_report(latency_dist_ms, 'timePerRequest')
+    return {
+        "requestsPerSecond": bm.completed_request_rate,
+        **ttft_data,
+        **tpot_data,
+        **throughput_data,
+        **time_per_request_data,
+    }
+
+def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
+    benchmark_report_json = []
+    for benchmark in benchmarks:
+        benchmarks_report = generate_benchmark_json(benchmark)
+        benchmark_report_json.append(benchmarks_report)
+    return benchmark_report_json
+
+def generate_ui_api_data(report: TextGenerationBenchmarkReport):
+    run_info_json = generate_run_info(report)
+    workload_details_json = generate_workload_details(report)
+    benchmarks_json = generate_benchmarks_json(report.benchmarks)
+    os.makedirs("ben_test", exist_ok=True)
+    # generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI
+    with open("ben_test/run_info.json", "w") as f:
+        json.dump(run_info_json, f, indent=2)
+    with open("ben_test/workload_details.json", "w") as f:
+        json.dump(workload_details_json, f, indent=2)
+    with open("ben_test/benchmarks.json", "w") as f:
+        json.dump(benchmarks_json, f, indent=2)
+
+    print("Reports saved to run_info.json, workload_details.json, benchmarks.json")
\ No newline at end of file

From f870c2000bfd5ddaa6624e240e3631071f4c2805 Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Tue, 4 Mar 2025 21:17:34 -0500
Subject: [PATCH 2/9] add in request over time calculation

---
 src/guidellm/utils/generate_ui_data.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
index e9415cc7..be298619 100644
--- a/src/guidellm/utils/generate_ui_data.py
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -72,6 +72,18 @@ def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
         "timestamp": timestamp
     }
 
+def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
+    request_over_time_results = []
+    for benchmark in benchmarks:
+        # compare benchmark start time to text generation result end time
+        all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
+        request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
+        request_distribution = Distribution(data=request_over_time_values)
+        result = generate_metric_report(request_distribution, "requestsOverTime")
+        request_over_time_results.append(result["requestsOverTime"])
+    return request_over_time_results
+
+
 def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
     all_prompt_token_data = [data for benchmark in report.benchmarks for data in benchmark.prompt_token_distribution.data]
     all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
@@ -84,6 +96,9 @@ def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str
     output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
     output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results]
     sample_outputs = random.sample(output_token_samples, min(5, len(output_token_samples)))
+
+    request_over_time_results = generate_request_over_time_data(report.benchmarks)
+
     return {
         "prompts": {
             "samples": sample_prompts,
@@ -93,6 +108,7 @@ def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str
             "samples": sample_outputs,
             **output_token_data
         },
+        "requestsOverTime": request_over_time_results,
         "server": {
             "target": report.args.get('target', 'N/A')
         }

From d1bbc0c1b2b8f7a2a0183ffb25e27a83ae18aa10 Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Fri, 7 Mar 2025 15:12:19 -0500
Subject: [PATCH 3/9] update data generation to better handle sample strings,
 other small fixes

---
 src/guidellm/utils/generate_ui_data.py | 35 ++++++++++++++++++--------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
index be298619..97ad8118 100644
--- a/src/guidellm/utils/generate_ui_data.py
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -68,7 +68,9 @@ def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
             "size": 0
         },
         "task": "N/A",
-        "dataset": "N/A",
+        "dataset": {
+            "name": "N/A"
+        },
         "timestamp": timestamp
     }
 
@@ -93,10 +95,13 @@ def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str
     prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
     prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results]
     sample_prompts = random.sample(prompt_token_samples, min(5, len(prompt_token_samples)))
+    sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts))
     output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
     output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results]
     sample_outputs = random.sample(output_token_samples, min(5, len(output_token_samples)))
 
+    sample_outputs = list(map(lambda output: output.replace("\n", " ").replace("\"", "'"), sample_outputs))
+
     request_over_time_results = generate_request_over_time_data(report.benchmarks)
 
     return {
@@ -104,7 +109,7 @@ def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str
             "samples": sample_prompts,
             **prompt_token_data
         },
-        "generation": {
+        "generations": {
             "samples": sample_outputs,
             **output_token_data
         },
@@ -138,17 +143,25 @@ def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
         benchmark_report_json.append(benchmarks_report)
     return benchmark_report_json
 
+def generate_js_variable(variable_name: str, data: dict) -> str:
+    json_data = json.dumps(data, indent=2)
+    return f'`window.{variable_name} = {json_data};`'  # Wrap in quotes
+
 def generate_ui_api_data(report: TextGenerationBenchmarkReport):
-    run_info_json = generate_run_info(report)
-    workload_details_json = generate_workload_details(report)
-    benchmarks_json = generate_benchmarks_json(report.benchmarks)
+    run_info_data = generate_run_info(report)
+    workload_details_data = generate_workload_details(report)
+    benchmarks_data = generate_benchmarks_json(report.benchmarks)
+    run_info_script = generate_js_variable("run_info", run_info_data)
+    workload_details_script = generate_js_variable("workload_details", workload_details_data)
+    benchmarks_script = generate_js_variable("benchmarks", benchmarks_data)
+
     os.makedirs("ben_test", exist_ok=True)
     # generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI
-    with open("ben_test/run_info.json", "w") as f:
-        json.dump(run_info_json, f, indent=2)
-    with open("ben_test/workload_details.json", "w") as f:
-        json.dump(workload_details_json, f, indent=2)
-    with open("ben_test/benchmarks.json", "w") as f:
-        json.dump(benchmarks_json, f, indent=2)
+    with open("ben_test/run_info.js", "w") as f:
+        f.write(run_info_script)
+    with open("ben_test/workload_details.js", "w") as f:
+        f.write(workload_details_script)
+    with open("ben_test/benchmarks.js", "w") as f:
+        f.write(benchmarks_script)
 
     print("Reports saved to run_info.json, workload_details.json, benchmarks.json")
\ No newline at end of file

From d59cadafee7c73ce2fbca5829f599364ae4ed3bb Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Tue, 11 Mar 2025 13:46:15 -0400
Subject: [PATCH 4/9] hack changes together to get values for request over time
 data, wip

---
 src/guidellm/utils/generate_ui_data.py | 74 +++++++++++++++++++++++---
 1 file changed, 68 insertions(+), 6 deletions(-)

diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
index 97ad8118..8b1c7f67 100644
--- a/src/guidellm/utils/generate_ui_data.py
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -1,6 +1,7 @@
 import os
 import json
 import random
+import math
 from typing import Any, Dict, List
 from guidellm.core.distribution import Distribution
 from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark
@@ -74,7 +75,12 @@ def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
         "timestamp": timestamp
     }
 
+def linearly_interpolate_value(target_input, lower_input, lower_output, upperInput, upper_output):
+    fraction = (target_input - lower_input) / (upperInput - lower_input)
+    return lower_output + fraction * (upper_output - lower_output)
+
 def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
+
     request_over_time_results = []
     for benchmark in benchmarks:
         # compare benchmark start time to text generation result end time
@@ -82,8 +88,59 @@ def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -
         request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
         request_distribution = Distribution(data=request_over_time_values)
         result = generate_metric_report(request_distribution, "requestsOverTime")
-        request_over_time_results.append(result["requestsOverTime"])
-    return request_over_time_results
+        result["requestsPerSecond"] = benchmark.completed_request_rate
+        request_over_time_results.append(result)
+
+    if len(benchmarks) == 1:
+        return request_over_time_results
+    
+    request_over_time_raw = []
+    sorted_bm = sorted(benchmarks, key=lambda bm: bm.completed_request_rate)
+    for benchmark in sorted_bm:
+        # compare benchmark start time to text generation result end time
+        all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
+        request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
+        request_at_rps = { "rps": benchmark.completed_request_rate, "requests_over_time": request_over_time_values }
+        request_over_time_raw.append(request_at_rps)
+
+    rps_values = [request_obj["rps"] for request_obj in request_over_time_raw]
+    rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
+    interpolated_request_values = []
+    lower_rps_index = 0
+    for rps in rps_range:
+        if rps > rps_values[lower_rps_index + 1]: lower_rps_index += 1
+        if rps == rps_values[lower_rps_index]:
+            interpolated_request_values.append({
+                "requests_per_second": rps,
+                "requests_over_time": request_over_time_raw[lower_rps_index]["requests_over_time"][:]
+            })
+            lower_rps_index += 1
+        elif rps < rps_values[lower_rps_index + 1]:
+            interpolated_requests_at_new_rps = []
+            for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
+                lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
+                upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
+                new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
+                interpolated_requests_at_new_rps.append(new_value)
+            interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
+        elif rps > rps_values[lower_rps_index + 1]:
+            while rps > rps_values[lower_rps_index + 1]:
+                lower_rps_index += 1
+            interpolated_requests_at_new_rps = []
+            for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
+                lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
+                upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
+                new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
+                interpolated_requests_at_new_rps.append(new_value)
+            interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
+    interpolated_request_over_time_results = []            
+    for request_value in interpolated_request_values:
+        request_distribution = Distribution(data=request_value["requests_over_time"])
+        result = generate_metric_report(request_distribution, "requestsOverTime")
+        result["requestsPerSecond"] = request_value["requests_per_second"]
+        interpolated_request_over_time_results.append(result)
+
+    return interpolated_request_over_time_results
 
 
 def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
@@ -93,13 +150,18 @@ def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str
     all_output_token_distribution = Distribution(data=all_output_token_data)
 
     prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
-    prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results]
-    sample_prompts = random.sample(prompt_token_samples, min(5, len(prompt_token_samples)))
-    sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts))
     output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
+
+    prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results]
     output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results]
-    sample_outputs = random.sample(output_token_samples, min(5, len(output_token_samples)))
 
+    num_samples = min(5, len(prompt_token_samples), len(output_token_samples))
+    sample_indices = random.sample(range(len(prompt_token_samples)), num_samples)
+
+    sample_prompts = [prompt_token_samples[i] for i in sample_indices]
+    sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts))
+
+    sample_outputs = [output_token_samples[i] for i in sample_indices]
     sample_outputs = list(map(lambda output: output.replace("\n", " ").replace("\"", "'"), sample_outputs))
 
     request_over_time_results = generate_request_over_time_data(report.benchmarks)

From 76fc2b42b7b65a1c6cfa3714d0d01ff1044d794a Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Wed, 19 Mar 2025 16:14:28 -0400
Subject: [PATCH 5/9] add interpolation of benchmark metrics by rps, remove
 interpolation of request over time data and use raw, refactor and test
 interpolation functionality

---
 src/guidellm/utils/__init__.py         |   7 +-
 src/guidellm/utils/generate_ui_data.py | 236 ++++++++++++++++---------
 src/guidellm/utils/interpolation.py    |  89 ++++++++++
 tests/unit/utils/test_interpolation.py |  26 +++
 4 files changed, 277 insertions(+), 81 deletions(-)
 create mode 100644 src/guidellm/utils/interpolation.py
 create mode 100644 tests/unit/utils/test_interpolation.py

diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
index 96c02049..81cf580f 100644
--- a/src/guidellm/utils/__init__.py
+++ b/src/guidellm/utils/__init__.py
@@ -1,5 +1,6 @@
-from .injector import create_report, inject_data
 from .generate_ui_data import generate_ui_api_data
+from .interpolation import linear_interpolate, interpolate_measurements, interpolate_data_points, stretch_list
+from .injector import create_report, inject_data
 from .progress import BenchmarkReportProgress
 from .text import (
     clean_text,
@@ -27,9 +28,12 @@
     "filter_text",
     "generate_ui_api_data",
     "inject_data",
+    "interpolate_data_points",
+    "interpolate_measurements",
     "is_path",
     "is_path_like",
     "is_url",
+    "linear_interpolate",
     "load_text",
     "load_text_lines",
     "load_transformers_dataset",
@@ -39,4 +43,5 @@
     "resolve_transformers_dataset_split",
     "split_lines_by_punctuation",
     "split_text",
+    "stretch_list",
 ]
diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
index 8b1c7f67..a450fd94 100644
--- a/src/guidellm/utils/generate_ui_data.py
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -5,6 +5,7 @@
 from typing import Any, Dict, List
 from guidellm.core.distribution import Distribution
 from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark
+from guidellm.utils.interpolation import interpolate_data_points
 
 def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
     total = dist.__len__()
@@ -61,8 +62,8 @@ def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int
         }
     }
 
-def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
-    timestamp = max(map(lambda bm: bm.end_time, report.benchmarks))
+def generate_run_info(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
+    timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None)
     return {
         "model": {
             "name": report.args.get('model', 'N/A'),
@@ -80,91 +81,109 @@ def linearly_interpolate_value(target_input, lower_input, lower_output, upperInp
     return lower_output + fraction * (upper_output - lower_output)
 
 def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
+    filtered_benchmarks = filter(lambda bm: bm.start_time is not None, benchmarks)
+    sorted_benchmarks = list(sorted(filtered_benchmarks, key=lambda bm: bm.start_time))
+    min_start_time = sorted_benchmarks[0].start_time
 
-    request_over_time_results = []
-    for benchmark in benchmarks:
-        # compare benchmark start time to text generation result end time
-        all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
-        request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
-        request_distribution = Distribution(data=request_over_time_values)
-        result = generate_metric_report(request_distribution, "requestsOverTime")
-        result["requestsPerSecond"] = benchmark.completed_request_rate
-        request_over_time_results.append(result)
+    all_request_times = [
+        result.start_time - min_start_time
+        for benchmark in sorted_benchmarks
+        for result in benchmark.results
+        if result.start_time is not None
+    ]
 
-    if len(benchmarks) == 1:
-        return request_over_time_results
+    request_distribution = Distribution(data=all_request_times)
+    final_result = generate_metric_report(request_distribution, "requestsOverTime")
+    return { "numBenchmarks": len(sorted_benchmarks), **final_result }
+
+# def generate_request_over_time_data_per_benchmark(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
+
+#     request_over_time_results = []
+#     for benchmark in benchmarks:
+#         # compare benchmark start time to text generation result end time
+#         all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
+#         request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
+#         request_distribution = Distribution(data=request_over_time_values)
+#         result = generate_metric_report(request_distribution, "requestsOverTime")
+#         result["requestsPerSecond"] = benchmark.completed_request_rate
+#         request_over_time_results.append(result)
+
+#     if len(benchmarks) == 1:
+#         return request_over_time_results
     
-    request_over_time_raw = []
-    sorted_bm = sorted(benchmarks, key=lambda bm: bm.completed_request_rate)
-    for benchmark in sorted_bm:
-        # compare benchmark start time to text generation result end time
-        all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
-        request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
-        request_at_rps = { "rps": benchmark.completed_request_rate, "requests_over_time": request_over_time_values }
-        request_over_time_raw.append(request_at_rps)
-
-    rps_values = [request_obj["rps"] for request_obj in request_over_time_raw]
-    rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
-    interpolated_request_values = []
-    lower_rps_index = 0
-    for rps in rps_range:
-        if rps > rps_values[lower_rps_index + 1]: lower_rps_index += 1
-        if rps == rps_values[lower_rps_index]:
-            interpolated_request_values.append({
-                "requests_per_second": rps,
-                "requests_over_time": request_over_time_raw[lower_rps_index]["requests_over_time"][:]
-            })
-            lower_rps_index += 1
-        elif rps < rps_values[lower_rps_index + 1]:
-            interpolated_requests_at_new_rps = []
-            for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
-                lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
-                upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
-                new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
-                interpolated_requests_at_new_rps.append(new_value)
-            interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
-        elif rps > rps_values[lower_rps_index + 1]:
-            while rps > rps_values[lower_rps_index + 1]:
-                lower_rps_index += 1
-            interpolated_requests_at_new_rps = []
-            for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
-                lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
-                upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
-                new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
-                interpolated_requests_at_new_rps.append(new_value)
-            interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
-    interpolated_request_over_time_results = []            
-    for request_value in interpolated_request_values:
-        request_distribution = Distribution(data=request_value["requests_over_time"])
-        result = generate_metric_report(request_distribution, "requestsOverTime")
-        result["requestsPerSecond"] = request_value["requests_per_second"]
-        interpolated_request_over_time_results.append(result)
-
-    return interpolated_request_over_time_results
-
-
-def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
-    all_prompt_token_data = [data for benchmark in report.benchmarks for data in benchmark.prompt_token_distribution.data]
+#     request_over_time_raw = []
+#     sorted_bm = sorted(benchmarks, key=lambda bm: bm.completed_request_rate)
+#     for benchmark in sorted_bm:
+#         # compare benchmark start time to text generation result end time
+#         all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
+#         request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
+#         request_at_rps = { "rps": benchmark.completed_request_rate, "requests_over_time": request_over_time_values }
+#         request_over_time_raw.append(request_at_rps)
+
+#     rps_values = [request_obj["rps"] for request_obj in request_over_time_raw]
+#     rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
+#     interpolated_request_values = []
+#     lower_rps_index = 0
+#     for rps in rps_range:
+#         if rps > rps_values[lower_rps_index + 1]: lower_rps_index += 1
+#         if rps == rps_values[lower_rps_index]:
+#             interpolated_request_values.append({
+#                 "requests_per_second": rps,
+#                 "requests_over_time": request_over_time_raw[lower_rps_index]["requests_over_time"][:]
+#             })
+#             lower_rps_index += 1
+#         elif rps < rps_values[lower_rps_index + 1]:
+#             interpolated_requests_at_new_rps = []
+#             for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
+#                 lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
+#                 upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
+#                 new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
+#                 interpolated_requests_at_new_rps.append(new_value)
+#             interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
+#         elif rps > rps_values[lower_rps_index + 1]:
+#             while rps > rps_values[lower_rps_index + 1]:
+#                 lower_rps_index += 1
+#             interpolated_requests_at_new_rps = []
+#             for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
+#                 lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
+#                 upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
+#                 new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
+#                 interpolated_requests_at_new_rps.append(new_value)
+#             interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
+#     interpolated_request_over_time_results = []            
+#     for request_value in interpolated_request_values:
+#         request_distribution = Distribution(data=request_value["requests_over_time"])
+#         result = generate_metric_report(request_distribution, "requestsOverTime")
+#         result["requestsPerSecond"] = request_value["requests_per_second"]
+#         interpolated_request_over_time_results.append(result)
+#     return { "rawData": request_over_time_results, "interpolatedData": interpolated_request_over_time_results }
+
+
+def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
+    all_prompt_token_data = [data for benchmark in benchmarks for data in benchmark.prompt_token_distribution.data]
     all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
-    all_output_token_data = [data for benchmark in report.benchmarks for data in benchmark.output_token_distribution.data]
+    all_output_token_data = [data for benchmark in benchmarks for data in benchmark.output_token_distribution.data]
     all_output_token_distribution = Distribution(data=all_output_token_data)
 
     prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
     output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
 
-    prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results]
-    output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results]
+    prompt_token_samples = [result.prompt for benchmark in benchmarks for result in benchmark.results]
+    output_token_samples = [result.output for benchmark in benchmarks for result in benchmark.results]
 
     num_samples = min(5, len(prompt_token_samples), len(output_token_samples))
     sample_indices = random.sample(range(len(prompt_token_samples)), num_samples)
 
     sample_prompts = [prompt_token_samples[i] for i in sample_indices]
+    """
+    Need a wholistic approach to parsing out characters in the prompt that don't covert well into the format we need
+    """
     sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts))
 
     sample_outputs = [output_token_samples[i] for i in sample_indices]
     sample_outputs = list(map(lambda output: output.replace("\n", " ").replace("\"", "'"), sample_outputs))
 
-    request_over_time_results = generate_request_over_time_data(report.benchmarks)
+    request_over_time_results = generate_request_over_time_data(benchmarks)
 
     return {
         "prompts": {
@@ -184,35 +203,92 @@ def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str
 def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
     ttft_dist_ms = Distribution(data=[val * 1000 for val in bm.ttft_distribution.data])
     ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
-    tpot_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data])
-    tpot_data = generate_metric_report(tpot_dist_ms, 'tpot')
-    throughput_dist_ms = Distribution(data=[val * 1000 for val in bm.output_token_throughput_distribution.data])
+    itl_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data])
+    itl_data = generate_metric_report(itl_dist_ms, 'tpot')
+    throughput_dist_ms = Distribution(data=bm.output_token_throughput_distribution.data)
     throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
     latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data])
-    time_per_request_data = generate_metric_report(latency_dist_ms, 'timePerRequest')
+    latency__data = generate_metric_report(latency_dist_ms, 'timePerRequest')
     return {
         "requestsPerSecond": bm.completed_request_rate,
+        **itl_data,
         **ttft_data,
-        **tpot_data,
         **throughput_data,
-        **time_per_request_data,
+        **latency__data,
     }
 
+def generate_interpolated_benchmarks(benchmarks: List[TextGenerationBenchmark]):
+    """
+    Should we only use constant rate benchmarks here since synchronous and throughput runs might not be appropriate to lump in for interoplation across all rps?
+
+    Other edge-case, what if rps doesn't span more than 1 whole rps even with multiple benchmarks
+    ex: 1.1, 1.3, 1.5, 2.1, 2.5, can interpolate at 2rps
+    or worse, 1.1, 1.4, 1.6, can't interpolate
+    """
+    if len(benchmarks) == 1:
+        return []
+    
+    sorted_benchmarks = sorted(benchmarks[:], key=lambda bm: bm.completed_request_rate)
+    rps_values = [bm.completed_request_rate for bm in sorted_benchmarks]
+    rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
+
+    ttft_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.ttft_distribution.data), sorted_benchmarks))
+    interpolated_ttft_data_by_rps = interpolate_data_points(ttft_data_by_rps, rps_range)
+
+    itl_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.itl_distribution.data), sorted_benchmarks))
+    interpolated_itl_data_by_rps = interpolate_data_points(itl_data_by_rps, rps_range)
+
+    throughput_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.output_token_throughput_distribution.data), sorted_benchmarks))
+    interpolated_throughput_data_by_rps = interpolate_data_points(throughput_data_by_rps, rps_range)
+
+    latency_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.request_latency_distribution.data), sorted_benchmarks))
+    interpolated_latency_data_by_rps = interpolate_data_points(latency_data_by_rps, rps_range)
+
+    benchmark_json = []
+    for i in range(len(interpolated_ttft_data_by_rps)):
+        rps, interpolated_ttft_data = interpolated_ttft_data_by_rps[i]
+        ttft_dist_ms = Distribution(data=[val * 1000 for val in interpolated_ttft_data])
+        final_ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
+        
+        _, interpolated_itl_data = interpolated_itl_data_by_rps[i]
+        itl_dist_ms = Distribution(data=[val * 1000 for val in interpolated_itl_data])
+        final_itl_data = generate_metric_report(itl_dist_ms, 'tpot')
+
+        _, interpolated_throughput_data = interpolated_throughput_data_by_rps[i]
+        throughput_dist_ms = Distribution(data=interpolated_throughput_data)
+        final_throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
+
+        _, interpolated_latency_data = interpolated_latency_data_by_rps[i]
+        latency_dist_ms = Distribution(data=[val * 1000 for val in interpolated_latency_data])
+        final_latency_data = generate_metric_report(latency_dist_ms, 'timePerRequest')
+
+        benchmark_json.append({
+            "requestsPerSecond": rps,
+            **final_itl_data,
+            **final_ttft_data,
+            **final_throughput_data,
+            **final_latency_data,
+        })
+    return benchmark_json
+
 def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
-    benchmark_report_json = []
+    raw_benchmark_json = []
     for benchmark in benchmarks:
         benchmarks_report = generate_benchmark_json(benchmark)
-        benchmark_report_json.append(benchmarks_report)
-    return benchmark_report_json
+        raw_benchmark_json.append(benchmarks_report)
+    interpolated_benchmark_json = generate_interpolated_benchmarks(benchmarks)
+
+    return { "raw": raw_benchmark_json, "interpolated_by_rps": interpolated_benchmark_json }
 
 def generate_js_variable(variable_name: str, data: dict) -> str:
     json_data = json.dumps(data, indent=2)
     return f'`window.{variable_name} = {json_data};`'  # Wrap in quotes
 
 def generate_ui_api_data(report: TextGenerationBenchmarkReport):
-    run_info_data = generate_run_info(report)
-    workload_details_data = generate_workload_details(report)
-    benchmarks_data = generate_benchmarks_json(report.benchmarks)
+    filtered_benchmarks = list(filter(lambda bm: bm.completed_request_rate > 0, report.benchmarks))
+    run_info_data = generate_run_info(report, filtered_benchmarks)
+    workload_details_data = generate_workload_details(report, filtered_benchmarks)
+    benchmarks_data = generate_benchmarks_json(filtered_benchmarks)
     run_info_script = generate_js_variable("run_info", run_info_data)
     workload_details_script = generate_js_variable("workload_details", workload_details_data)
     benchmarks_script = generate_js_variable("benchmarks", benchmarks_data)
diff --git a/src/guidellm/utils/interpolation.py b/src/guidellm/utils/interpolation.py
new file mode 100644
index 00000000..ef19d8e4
--- /dev/null
+++ b/src/guidellm/utils/interpolation.py
@@ -0,0 +1,89 @@
+from typing import List, Tuple
+import numpy as np
+
+def linear_interpolate(target: float, lower: Tuple[float, float], upper: Tuple[float, float]) -> float:
+    """
+    Linearly interpolates a value at 'target' given two points.
+    If the target equals one of the bounds, the corresponding value is returned.
+    """
+    lower_ref, lower_measurement = lower
+    upper_ref, upper_measurement = upper
+
+    if upper_ref == lower_ref:
+        return lower_measurement
+    if target <= lower_ref:
+        return lower_measurement
+    if target >= upper_ref:
+        return upper_measurement
+
+    t = (target - lower_ref) / (upper_ref - lower_ref)
+    return lower_measurement + t * (upper_measurement - lower_measurement)
+
+def stretch_list(arr: List[float], target_length: int):
+    if len(arr) == target_length:
+        return np.array(arr)
+    
+    original_x = np.linspace(0, 1, len(arr))
+    target_x = np.linspace(0, 1, target_length)
+    stretched_arr = list(np.interp(target_x, original_x, arr))
+    return stretched_arr
+
+def interpolate_measurements(target: float, lower_ref_measurements_pair: Tuple[float, List[float]], upper_ref_measurements_pair: Tuple[float, List[float]]) -> List[float]:
+    """
+    Interpolates each corresponding measurement value between lower and upper benchmarks.
+    Assumes that lower_measurements and upper_measurements have the same length.
+    """
+    lower_ref, lower_measurements = lower_ref_measurements_pair
+    upper_ref, upper_measurements = upper_ref_measurements_pair
+
+    if len(lower_measurements) < len(upper_measurements):
+        lower_measurements = stretch_list(lower_measurements, len(upper_measurements))
+    if len(lower_measurements) > len(upper_measurements):
+        upper_measurements = stretch_list(upper_measurements, len(lower_measurements))
+
+    return [
+        linear_interpolate(target, (lower_ref, lower_measurements[i]), (upper_ref, upper_measurements[i]))
+        for i in range(len(lower_measurements))
+    ]
+
+def interpolate_data_points(data_points: List[Tuple[float, List[float]]],
+                            target_ref: List[float]) -> List[Tuple[float, List[float]]]:
+    """
+    Given sorted data_points as tuples of (scalar, measurements) and a list of target scalar values,
+    interpolate the measurements for each target.
+    
+    The data_points must be sorted by the scalar value in ascending order.
+    Only target scalar values that fall within the min and max of the data_points are considered.
+    """
+    if not data_points:
+        return []
+
+    lower_bound = data_points[0][0]
+    upper_bound = data_points[-1][0]
+    # Filter target_ref to only include values within the provided range.
+    valid_targets = [t for t in target_ref if lower_bound <= t <= upper_bound]
+
+    interpolated_results = []
+    # Pointer to the current lower data point index.
+    lower_idx = 0
+
+    for target in sorted(valid_targets):
+        # Advance the lower_idx until we find the correct interval.
+        while (lower_idx < len(data_points) - 1 and target > data_points[lower_idx + 1][0]):
+            lower_idx += 1
+
+        # If the target exactly matches a known scalar value, use its measurements.
+        if target == data_points[lower_idx][0]:
+            interpolated_results.append((target, data_points[lower_idx][1][:]))
+        # Otherwise, if target lies between two data points, interpolate.
+        elif lower_idx < len(data_points) - 1:
+            lower_ref, lower_measurements = data_points[lower_idx]
+            upper_ref, upper_measurements = data_points[lower_idx + 1]
+            interpolated = interpolate_measurements(target, (lower_ref, lower_measurements),
+                                                    (upper_ref, upper_measurements))
+            interpolated_results.append((target, interpolated))
+        else:
+            # If for some reason target is above the highest known data point, ignore it.
+            continue
+
+    return interpolated_results
\ No newline at end of file
diff --git a/tests/unit/utils/test_interpolation.py b/tests/unit/utils/test_interpolation.py
new file mode 100644
index 00000000..a79504b5
--- /dev/null
+++ b/tests/unit/utils/test_interpolation.py
@@ -0,0 +1,26 @@
+import pytest
+from guidellm.utils.interpolation import (
+  linear_interpolate,
+  interpolate_measurements,
+  interpolate_data_points,
+  stretch_list
+)
+
+@pytest.mark.smoke()
+def test_linear_interpolate():
+  assert linear_interpolate(2, (1, 4), (3, 6)) == 5
+  
+
+@pytest.mark.smoke()
+def test_stretch_list():
+  assert stretch_list([1,3,5], 5) == [1,2,3,4,5]
+  
+@pytest.mark.smoke()
+def test_interpolate_measurements():
+  assert interpolate_measurements(2, (1, [1,2,3,4,5]), (3, [2,3,4,5,6])) == [1.5,2.5,3.5,4.5,5.5]
+  assert interpolate_measurements(2, (1, [1,2,3,4,5]), (3, [5,4,3,2,1])) == [3,3,3,3,3]
+
+  
+@pytest.mark.smoke()
+def test_interpolate_data_point():
+  assert interpolate_data_points([(1, [1,2,3,4,5]), (3, [2,3,4,5,6]), (9, [5,6,7,8,9])], [1,2,3,4,5,6,7,8,9]) == [(1, [1,2,3,4,5]), (2, [1.5,2.5,3.5,4.5,5.5]), (3, [2,3,4,5,6]), (4, [2.5, 3.5, 4.5, 5.5, 6.5]), (5, [3, 4, 5, 6, 7]), (6, [3.5, 4.5, 5.5, 6.5, 7.5]), (7, [4, 5, 6, 7, 8]), (8, [4.5, 5.5, 6.5, 7.5, 8.5]), (9, [5, 6, 7, 8, 9])]
\ No newline at end of file

From 18335186431ab7c03690dd475a4b993bb6a9578c Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Wed, 19 Mar 2025 16:15:20 -0400
Subject: [PATCH 6/9] remove commented code

---
 src/guidellm/utils/generate_ui_data.py | 62 --------------------------
 1 file changed, 62 deletions(-)

diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
index a450fd94..1ccaf3de 100644
--- a/src/guidellm/utils/generate_ui_data.py
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -96,68 +96,6 @@ def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -
     final_result = generate_metric_report(request_distribution, "requestsOverTime")
     return { "numBenchmarks": len(sorted_benchmarks), **final_result }
 
-# def generate_request_over_time_data_per_benchmark(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
-
-#     request_over_time_results = []
-#     for benchmark in benchmarks:
-#         # compare benchmark start time to text generation result end time
-#         all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
-#         request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
-#         request_distribution = Distribution(data=request_over_time_values)
-#         result = generate_metric_report(request_distribution, "requestsOverTime")
-#         result["requestsPerSecond"] = benchmark.completed_request_rate
-#         request_over_time_results.append(result)
-
-#     if len(benchmarks) == 1:
-#         return request_over_time_results
-    
-#     request_over_time_raw = []
-#     sorted_bm = sorted(benchmarks, key=lambda bm: bm.completed_request_rate)
-#     for benchmark in sorted_bm:
-#         # compare benchmark start time to text generation result end time
-#         all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
-#         request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
-#         request_at_rps = { "rps": benchmark.completed_request_rate, "requests_over_time": request_over_time_values }
-#         request_over_time_raw.append(request_at_rps)
-
-#     rps_values = [request_obj["rps"] for request_obj in request_over_time_raw]
-#     rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
-#     interpolated_request_values = []
-#     lower_rps_index = 0
-#     for rps in rps_range:
-#         if rps > rps_values[lower_rps_index + 1]: lower_rps_index += 1
-#         if rps == rps_values[lower_rps_index]:
-#             interpolated_request_values.append({
-#                 "requests_per_second": rps,
-#                 "requests_over_time": request_over_time_raw[lower_rps_index]["requests_over_time"][:]
-#             })
-#             lower_rps_index += 1
-#         elif rps < rps_values[lower_rps_index + 1]:
-#             interpolated_requests_at_new_rps = []
-#             for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
-#                 lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
-#                 upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
-#                 new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
-#                 interpolated_requests_at_new_rps.append(new_value)
-#             interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
-#         elif rps > rps_values[lower_rps_index + 1]:
-#             while rps > rps_values[lower_rps_index + 1]:
-#                 lower_rps_index += 1
-#             interpolated_requests_at_new_rps = []
-#             for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
-#                 lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
-#                 upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
-#                 new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
-#                 interpolated_requests_at_new_rps.append(new_value)
-#             interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
-#     interpolated_request_over_time_results = []            
-#     for request_value in interpolated_request_values:
-#         request_distribution = Distribution(data=request_value["requests_over_time"])
-#         result = generate_metric_report(request_distribution, "requestsOverTime")
-#         result["requestsPerSecond"] = request_value["requests_per_second"]
-#         interpolated_request_over_time_results.append(result)
-#     return { "rawData": request_over_time_results, "interpolatedData": interpolated_request_over_time_results }
-
 
 def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
     all_prompt_token_data = [data for benchmark in benchmarks for data in benchmark.prompt_token_distribution.data]

From 8c6325577acccd5c9dc0931e0e8a3e3ed48e12fb Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Thu, 20 Mar 2025 19:56:50 -0400
Subject: [PATCH 7/9] update json property name from snake_case to camelCase

---
 src/guidellm/utils/generate_ui_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
index 1ccaf3de..1c8fd775 100644
--- a/src/guidellm/utils/generate_ui_data.py
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -216,7 +216,7 @@ def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
         raw_benchmark_json.append(benchmarks_report)
     interpolated_benchmark_json = generate_interpolated_benchmarks(benchmarks)
 
-    return { "raw": raw_benchmark_json, "interpolated_by_rps": interpolated_benchmark_json }
+    return { "raw": raw_benchmark_json, "interpolatedByRps": interpolated_benchmark_json }
 
 def generate_js_variable(variable_name: str, data: dict) -> str:
     json_data = json.dumps(data, indent=2)

From 9691ef31e48b0b5d0197a52344222c9fdf9fdd9e Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Thu, 27 Mar 2025 14:10:03 -0400
Subject: [PATCH 8/9] include mode

---
 src/guidellm/utils/generate_ui_data.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
index 1c8fd775..507d4910 100644
--- a/src/guidellm/utils/generate_ui_data.py
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -133,6 +133,7 @@ def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks:
             **output_token_data
         },
         "requestsOverTime": request_over_time_results,
+        "rateType": report.args["mode"],
         "server": {
             "target": report.args.get('target', 'N/A')
         }
@@ -223,7 +224,7 @@ def generate_js_variable(variable_name: str, data: dict) -> str:
     return f'`window.{variable_name} = {json_data};`'  # Wrap in quotes
 
 def generate_ui_api_data(report: TextGenerationBenchmarkReport):
-    filtered_benchmarks = list(filter(lambda bm: bm.completed_request_rate > 0, report.benchmarks))
+    filtered_benchmarks = list(filter(lambda bm: (bm.completed_request_rate > 0) and bm.mode != 'throughput', report.benchmarks))
     run_info_data = generate_run_info(report, filtered_benchmarks)
     workload_details_data = generate_workload_details(report, filtered_benchmarks)
     benchmarks_data = generate_benchmarks_json(filtered_benchmarks)

From 656e6dd13e903eda58a3d1ca4d9bc020f9887f90 Mon Sep 17 00:00:00 2001
From: dalthecow <dalcowboiz@gmail.com>
Date: Thu, 3 Apr 2025 18:37:47 -0400
Subject: [PATCH 9/9] remove backen benchmark interoplation, hook up data
 generation to injector and output html report

---
 src/guidellm/config.py                 | 10 ++-
 src/guidellm/main.py                   |  4 +-
 src/guidellm/utils/__init__.py         |  4 --
 src/guidellm/utils/generate_ui_data.py | 80 ++++-------------------
 src/guidellm/utils/injector.py         | 41 ++++--------
 src/guidellm/utils/interpolation.py    | 89 --------------------------
 tests/unit/utils/test_interpolation.py | 26 --------
 7 files changed, 33 insertions(+), 221 deletions(-)
 delete mode 100644 src/guidellm/utils/interpolation.py
 delete mode 100644 tests/unit/utils/test_interpolation.py

diff --git a/src/guidellm/config.py b/src/guidellm/config.py
index 2d4e102a..52dcaf3e 100644
--- a/src/guidellm/config.py
+++ b/src/guidellm/config.py
@@ -32,9 +32,9 @@ class Environment(str, Enum):
 
 ENV_REPORT_MAPPING = {
     Environment.PROD: "https://guidellm.neuralmagic.com/local-report/index.html",
-    Environment.STAGING: "https://staging.guidellm.neuralmagic.com/local-report/index.html",
-    Environment.DEV: "https://dev.guidellm.neuralmagic.com/local-report/index.html",
-    Environment.LOCAL: "tests/dummy/report.html",
+    Environment.STAGING: "https://review.neuralmagic.com/guidellm-ui/staging/index.html",
+    Environment.DEV: "https://review.neuralmagic.com/guidellm-ui/dev/index.html",
+    Environment.LOCAL: "http://localhost:3000/index.html",
 }
 
 
@@ -112,8 +112,6 @@ class ReportGenerationSettings(BaseModel):
     """
 
     source: str = ""
-    report_html_match: str = "window.report_data = {};"
-    report_html_placeholder: str = "{}"
 
 
 class Settings(BaseSettings):
@@ -138,7 +136,7 @@ class Settings(BaseSettings):
     )
 
     # general settings
-    env: Environment = Environment.PROD
+    env: Environment = Environment.DEV
     request_timeout: int = 60 * 5  # 5 minutes
     request_http2: bool = True
     max_concurrency: int = 512
diff --git a/src/guidellm/main.py b/src/guidellm/main.py
index ad29beeb..5bd79b04 100644
--- a/src/guidellm/main.py
+++ b/src/guidellm/main.py
@@ -2,6 +2,7 @@
 from typing import Any, Literal, Mapping, Optional, Union, get_args
 
 import click
+from guidellm.utils.injector import create_report
 from loguru import logger
 from transformers import AutoTokenizer  # type: ignore[import-untyped]
 
@@ -290,7 +291,8 @@ def generate_benchmark_report(
     )
     report = asyncio.run(_run_executor_for_result(executor))
 
-    generate_ui_api_data(report)
+    js_data = generate_ui_api_data(report)
+    create_report(js_data, 'guidellm_report')
 
     # Save and print report
     guidance_report = GuidanceReport()
diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
index 81cf580f..8ca923b7 100644
--- a/src/guidellm/utils/__init__.py
+++ b/src/guidellm/utils/__init__.py
@@ -1,5 +1,4 @@
 from .generate_ui_data import generate_ui_api_data
-from .interpolation import linear_interpolate, interpolate_measurements, interpolate_data_points, stretch_list
 from .injector import create_report, inject_data
 from .progress import BenchmarkReportProgress
 from .text import (
@@ -28,12 +27,9 @@
     "filter_text",
     "generate_ui_api_data",
     "inject_data",
-    "interpolate_data_points",
-    "interpolate_measurements",
     "is_path",
     "is_path_like",
     "is_url",
-    "linear_interpolate",
     "load_text",
     "load_text_lines",
     "load_transformers_dataset",
diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
index 507d4910..6181c0c3 100644
--- a/src/guidellm/utils/generate_ui_data.py
+++ b/src/guidellm/utils/generate_ui_data.py
@@ -5,7 +5,6 @@
 from typing import Any, Dict, List
 from guidellm.core.distribution import Distribution
 from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark
-from guidellm.utils.interpolation import interpolate_data_points
 
 def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
     total = dist.__len__()
@@ -76,10 +75,6 @@ def generate_run_info(report: TextGenerationBenchmarkReport, benchmarks: List[Te
         "timestamp": timestamp
     }
 
-def linearly_interpolate_value(target_input, lower_input, lower_output, upperInput, upper_output):
-    fraction = (target_input - lower_input) / (upperInput - lower_input)
-    return lower_output + fraction * (upper_output - lower_output)
-
 def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
     filtered_benchmarks = filter(lambda bm: bm.start_time is not None, benchmarks)
     sorted_benchmarks = list(sorted(filtered_benchmarks, key=lambda bm: bm.start_time))
@@ -106,7 +101,7 @@ def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks:
     prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
     output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
 
-    prompt_token_samples = [result.prompt for benchmark in benchmarks for result in benchmark.results]
+    prompt_token_samples = [result.request.prompt for benchmark in benchmarks for result in benchmark.results]
     output_token_samples = [result.output for benchmark in benchmarks for result in benchmark.results]
 
     num_samples = min(5, len(prompt_token_samples), len(output_token_samples))
@@ -140,9 +135,9 @@ def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks:
     }
 
 def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
-    ttft_dist_ms = Distribution(data=[val * 1000 for val in bm.ttft_distribution.data])
+    ttft_dist_ms = Distribution(data=bm.ttft_distribution.data)
     ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
-    itl_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data])
+    itl_dist_ms = Distribution(data=bm.itl_distribution.data)
     itl_data = generate_metric_report(itl_dist_ms, 'tpot')
     throughput_dist_ms = Distribution(data=bm.output_token_throughput_distribution.data)
     throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
@@ -156,72 +151,17 @@ def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
         **latency__data,
     }
 
-def generate_interpolated_benchmarks(benchmarks: List[TextGenerationBenchmark]):
-    """
-    Should we only use constant rate benchmarks here since synchronous and throughput runs might not be appropriate to lump in for interoplation across all rps?
-
-    Other edge-case, what if rps doesn't span more than 1 whole rps even with multiple benchmarks
-    ex: 1.1, 1.3, 1.5, 2.1, 2.5, can interpolate at 2rps
-    or worse, 1.1, 1.4, 1.6, can't interpolate
-    """
-    if len(benchmarks) == 1:
-        return []
-    
-    sorted_benchmarks = sorted(benchmarks[:], key=lambda bm: bm.completed_request_rate)
-    rps_values = [bm.completed_request_rate for bm in sorted_benchmarks]
-    rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
-
-    ttft_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.ttft_distribution.data), sorted_benchmarks))
-    interpolated_ttft_data_by_rps = interpolate_data_points(ttft_data_by_rps, rps_range)
-
-    itl_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.itl_distribution.data), sorted_benchmarks))
-    interpolated_itl_data_by_rps = interpolate_data_points(itl_data_by_rps, rps_range)
-
-    throughput_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.output_token_throughput_distribution.data), sorted_benchmarks))
-    interpolated_throughput_data_by_rps = interpolate_data_points(throughput_data_by_rps, rps_range)
-
-    latency_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.request_latency_distribution.data), sorted_benchmarks))
-    interpolated_latency_data_by_rps = interpolate_data_points(latency_data_by_rps, rps_range)
-
-    benchmark_json = []
-    for i in range(len(interpolated_ttft_data_by_rps)):
-        rps, interpolated_ttft_data = interpolated_ttft_data_by_rps[i]
-        ttft_dist_ms = Distribution(data=[val * 1000 for val in interpolated_ttft_data])
-        final_ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
-        
-        _, interpolated_itl_data = interpolated_itl_data_by_rps[i]
-        itl_dist_ms = Distribution(data=[val * 1000 for val in interpolated_itl_data])
-        final_itl_data = generate_metric_report(itl_dist_ms, 'tpot')
-
-        _, interpolated_throughput_data = interpolated_throughput_data_by_rps[i]
-        throughput_dist_ms = Distribution(data=interpolated_throughput_data)
-        final_throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
-
-        _, interpolated_latency_data = interpolated_latency_data_by_rps[i]
-        latency_dist_ms = Distribution(data=[val * 1000 for val in interpolated_latency_data])
-        final_latency_data = generate_metric_report(latency_dist_ms, 'timePerRequest')
-
-        benchmark_json.append({
-            "requestsPerSecond": rps,
-            **final_itl_data,
-            **final_ttft_data,
-            **final_throughput_data,
-            **final_latency_data,
-        })
-    return benchmark_json
-
 def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
-    raw_benchmark_json = []
+    benchmark_json = []
     for benchmark in benchmarks:
         benchmarks_report = generate_benchmark_json(benchmark)
-        raw_benchmark_json.append(benchmarks_report)
-    interpolated_benchmark_json = generate_interpolated_benchmarks(benchmarks)
+        benchmark_json.append(benchmarks_report)
 
-    return { "raw": raw_benchmark_json, "interpolatedByRps": interpolated_benchmark_json }
+    return { "benchmarks": benchmark_json }
 
 def generate_js_variable(variable_name: str, data: dict) -> str:
     json_data = json.dumps(data, indent=2)
-    return f'`window.{variable_name} = {json_data};`'  # Wrap in quotes
+    return f'window.{variable_name} = {json_data};'
 
 def generate_ui_api_data(report: TextGenerationBenchmarkReport):
     filtered_benchmarks = list(filter(lambda bm: (bm.completed_request_rate > 0) and bm.mode != 'throughput', report.benchmarks))
@@ -241,4 +181,8 @@ def generate_ui_api_data(report: TextGenerationBenchmarkReport):
     with open("ben_test/benchmarks.js", "w") as f:
         f.write(benchmarks_script)
 
-    print("Reports saved to run_info.json, workload_details.json, benchmarks.json")
\ No newline at end of file
+    return {
+        "window.run_info = {};": run_info_script,
+        "window.workload_details = {};": workload_details_script,
+        "window.benchmarks = {};": benchmarks_script,
+    }
\ No newline at end of file
diff --git a/src/guidellm/utils/injector.py b/src/guidellm/utils/injector.py
index fb5216aa..21e20901 100644
--- a/src/guidellm/utils/injector.py
+++ b/src/guidellm/utils/injector.py
@@ -1,20 +1,18 @@
 from pathlib import Path
 from typing import Union
 
-from pydantic import BaseModel
-
 from guidellm.config import settings
 from guidellm.utils.text import load_text
 
 __all__ = ["create_report", "inject_data"]
 
 
-def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
+def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
     """
-    Creates a report from the model and saves it to the output path.
+    Creates a report from the dictionary and saves it to the output path.
 
-    :param model: the model to serialize and inject
-    :type model: BaseModel
+    :param js_data: dict with match str and json data to inject
+    :type js_data: dict
     :param output_path: the path, either a file or a directory,
         to save the report to. If a directory, the report will be saved
         as "report.html" inside of the directory.
@@ -27,10 +25,8 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
 
     html_content = load_text(settings.report_generation.source)
     report_content = inject_data(
-        model,
+        js_data,
         html_content,
-        settings.report_generation.report_html_match,
-        settings.report_generation.report_html_placeholder,
     )
 
     if not output_path.suffix:
@@ -39,32 +35,23 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
 
     output_path.parent.mkdir(parents=True, exist_ok=True)
     output_path.write_text(report_content)
-
+    print(f'Report saved to {output_path}')
     return output_path
 
-
 def inject_data(
-    model: BaseModel,
+    js_data: dict,
     html: str,
-    match: str,
-    placeholder: str,
 ) -> str:
     """
-    Injects the data from the model into the HTML while replacing the placeholder.
+    Injects the json data into the HTML while replacing the placeholder.
 
-    :param model: the model to serialize and inject
-    :type model: BaseModel
+    :param js_data: the json data to inject
+    :type js_data: dict
     :param html: the html to inject the data into
     :type html: str
-    :param match: the string to match in the html to find the placeholder
-    :type match: str
-    :param placeholder: the placeholder to replace with the model data
-        inside of the placeholder
-    :type placeholder: str
-    :return: the html with the model data injected
+    :return: the html with the json data injected
     :rtype: str
     """
-    model_str = model.json()
-    inject_str = match.replace(placeholder, model_str)
-
-    return html.replace(match, inject_str)
+    for placeholder, script in js_data.items():
+        html = html.replace(placeholder, script)
+    return html
\ No newline at end of file
diff --git a/src/guidellm/utils/interpolation.py b/src/guidellm/utils/interpolation.py
deleted file mode 100644
index ef19d8e4..00000000
--- a/src/guidellm/utils/interpolation.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from typing import List, Tuple
-import numpy as np
-
-def linear_interpolate(target: float, lower: Tuple[float, float], upper: Tuple[float, float]) -> float:
-    """
-    Linearly interpolates a value at 'target' given two points.
-    If the target equals one of the bounds, the corresponding value is returned.
-    """
-    lower_ref, lower_measurement = lower
-    upper_ref, upper_measurement = upper
-
-    if upper_ref == lower_ref:
-        return lower_measurement
-    if target <= lower_ref:
-        return lower_measurement
-    if target >= upper_ref:
-        return upper_measurement
-
-    t = (target - lower_ref) / (upper_ref - lower_ref)
-    return lower_measurement + t * (upper_measurement - lower_measurement)
-
-def stretch_list(arr: List[float], target_length: int):
-    if len(arr) == target_length:
-        return np.array(arr)
-    
-    original_x = np.linspace(0, 1, len(arr))
-    target_x = np.linspace(0, 1, target_length)
-    stretched_arr = list(np.interp(target_x, original_x, arr))
-    return stretched_arr
-
-def interpolate_measurements(target: float, lower_ref_measurements_pair: Tuple[float, List[float]], upper_ref_measurements_pair: Tuple[float, List[float]]) -> List[float]:
-    """
-    Interpolates each corresponding measurement value between lower and upper benchmarks.
-    Assumes that lower_measurements and upper_measurements have the same length.
-    """
-    lower_ref, lower_measurements = lower_ref_measurements_pair
-    upper_ref, upper_measurements = upper_ref_measurements_pair
-
-    if len(lower_measurements) < len(upper_measurements):
-        lower_measurements = stretch_list(lower_measurements, len(upper_measurements))
-    if len(lower_measurements) > len(upper_measurements):
-        upper_measurements = stretch_list(upper_measurements, len(lower_measurements))
-
-    return [
-        linear_interpolate(target, (lower_ref, lower_measurements[i]), (upper_ref, upper_measurements[i]))
-        for i in range(len(lower_measurements))
-    ]
-
-def interpolate_data_points(data_points: List[Tuple[float, List[float]]],
-                            target_ref: List[float]) -> List[Tuple[float, List[float]]]:
-    """
-    Given sorted data_points as tuples of (scalar, measurements) and a list of target scalar values,
-    interpolate the measurements for each target.
-    
-    The data_points must be sorted by the scalar value in ascending order.
-    Only target scalar values that fall within the min and max of the data_points are considered.
-    """
-    if not data_points:
-        return []
-
-    lower_bound = data_points[0][0]
-    upper_bound = data_points[-1][0]
-    # Filter target_ref to only include values within the provided range.
-    valid_targets = [t for t in target_ref if lower_bound <= t <= upper_bound]
-
-    interpolated_results = []
-    # Pointer to the current lower data point index.
-    lower_idx = 0
-
-    for target in sorted(valid_targets):
-        # Advance the lower_idx until we find the correct interval.
-        while (lower_idx < len(data_points) - 1 and target > data_points[lower_idx + 1][0]):
-            lower_idx += 1
-
-        # If the target exactly matches a known scalar value, use its measurements.
-        if target == data_points[lower_idx][0]:
-            interpolated_results.append((target, data_points[lower_idx][1][:]))
-        # Otherwise, if target lies between two data points, interpolate.
-        elif lower_idx < len(data_points) - 1:
-            lower_ref, lower_measurements = data_points[lower_idx]
-            upper_ref, upper_measurements = data_points[lower_idx + 1]
-            interpolated = interpolate_measurements(target, (lower_ref, lower_measurements),
-                                                    (upper_ref, upper_measurements))
-            interpolated_results.append((target, interpolated))
-        else:
-            # If for some reason target is above the highest known data point, ignore it.
-            continue
-
-    return interpolated_results
\ No newline at end of file
diff --git a/tests/unit/utils/test_interpolation.py b/tests/unit/utils/test_interpolation.py
deleted file mode 100644
index a79504b5..00000000
--- a/tests/unit/utils/test_interpolation.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import pytest
-from guidellm.utils.interpolation import (
-  linear_interpolate,
-  interpolate_measurements,
-  interpolate_data_points,
-  stretch_list
-)
-
-@pytest.mark.smoke()
-def test_linear_interpolate():
-  assert linear_interpolate(2, (1, 4), (3, 6)) == 5
-  
-
-@pytest.mark.smoke()
-def test_stretch_list():
-  assert stretch_list([1,3,5], 5) == [1,2,3,4,5]
-  
-@pytest.mark.smoke()
-def test_interpolate_measurements():
-  assert interpolate_measurements(2, (1, [1,2,3,4,5]), (3, [2,3,4,5,6])) == [1.5,2.5,3.5,4.5,5.5]
-  assert interpolate_measurements(2, (1, [1,2,3,4,5]), (3, [5,4,3,2,1])) == [3,3,3,3,3]
-
-  
-@pytest.mark.smoke()
-def test_interpolate_data_point():
-  assert interpolate_data_points([(1, [1,2,3,4,5]), (3, [2,3,4,5,6]), (9, [5,6,7,8,9])], [1,2,3,4,5,6,7,8,9]) == [(1, [1,2,3,4,5]), (2, [1.5,2.5,3.5,4.5,5.5]), (3, [2,3,4,5,6]), (4, [2.5, 3.5, 4.5, 5.5, 6.5]), (5, [3, 4, 5, 6, 7]), (6, [3.5, 4.5, 5.5, 6.5, 7.5]), (7, [4, 5, 6, 7, 8]), (8, [4.5, 5.5, 6.5, 7.5, 8.5]), (9, [5, 6, 7, 8, 9])]
\ No newline at end of file