Skip to content

Commit fe35989

Browse files
committed
Add clean 4-run benchmark results with APC experiment
Fresh runs from same code, same server, with vLLM restart between sessions. All results include cost data and JMLC latency breakdown. Session 1 (normal): vLLM first, SystemDS second Session 2 (reverse): SystemDS first, vLLM second Key findings: - SystemDS matches vLLM on 4/5 workloads (byte-for-byte identical) - Summarization: 1st-run always 25/50, 2nd-run always 31/50 (APC) - Same-position runs are 100% text-identical across sessions - JMLC overhead: <3% on generation workloads, ~29% on embeddings
1 parent 21456e9 commit fe35989

File tree

81 files changed

+4059
-118
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+4059
-118
lines changed

scripts/staging/llm-bench/README.md

Lines changed: 83 additions & 118 deletions
Large diffs are not rendered by default.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"git_commit_hash": "a692e00fa32d251dc7b1fba7a219f6820cfa319d",
3+
"timestamp_utc": "2026-03-05T04:33:40.663964+00:00",
4+
"python_version": "3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0]",
5+
"platform": {
6+
"os": "Linux",
7+
"architecture": "x86_64"
8+
},
9+
"backend": "systemds",
10+
"model": "Qwen/Qwen2.5-3B-Instruct",
11+
"workload_config_path": "/home/kubraaksu/systemds/scripts/staging/llm-bench/workloads/embeddings/config.yaml",
12+
"workload_config_sha256": "2d1ce87f23c894dd956b4354f78df96dcc271d192ed3d0d6f048eeb72b006c88",
13+
"gpu": {
14+
"gpu_count": 3,
15+
"gpus": [
16+
{
17+
"index": 0,
18+
"name": "NVIDIA H100 PCIe",
19+
"memory_total_mb": 81559.0,
20+
"memory_used_mb": 483.125,
21+
"memory_free_mb": 81075.875,
22+
"gpu_utilization_pct": 99,
23+
"memory_utilization_pct": 22
24+
},
25+
{
26+
"index": 1,
27+
"name": "NVIDIA H100 PCIe",
28+
"memory_total_mb": 81559.0,
29+
"memory_used_mb": 74730.6875,
30+
"memory_free_mb": 6828.3125,
31+
"gpu_utilization_pct": 0,
32+
"memory_utilization_pct": 0
33+
},
34+
{
35+
"index": 2,
36+
"name": "NVIDIA H100 PCIe",
37+
"memory_total_mb": 81559.0,
38+
"memory_used_mb": 483.125,
39+
"memory_free_mb": 81075.875,
40+
"gpu_utilization_pct": 0,
41+
"memory_utilization_pct": 0
42+
}
43+
]
44+
}
45+
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
{
2+
"n": 50.0,
3+
"latency_ms_mean": 60.18735467921942,
4+
"latency_ms_std": 6.098655589554143,
5+
"latency_ms_min": 55.26735467921942,
6+
"latency_ms_max": 97.26735467921942,
7+
"latency_ms_p50": 58.26735467921942,
8+
"latency_ms_p95": 63.26735467921942,
9+
"latency_ms_cv": 0.10132785569424262,
10+
"throughput_req_per_s": 15.879617740218775,
11+
"accuracy_mean": 0.9,
12+
"accuracy_count": "45/50",
13+
"pearson_r": 0.9172814516781994,
14+
"pearson_n": 50,
15+
"total_input_tokens": 3589,
16+
"total_output_tokens": 250,
17+
"total_tokens": 3839,
18+
"electricity_kwh": 0.00030612267818004406,
19+
"electricity_cost_usd": 9.183680345401322e-05,
20+
"hardware_amortization_usd": 0.0017492724467431092,
21+
"total_compute_cost_usd": 0.0018411092501971224,
22+
"memory_mb_initial": 139.0,
23+
"memory_mb_peak": 158.0,
24+
"memory_mb_avg": 155.28571428571428,
25+
"cpu_percent_avg": 3.1285714285714286,
26+
"gpu_info": {
27+
"gpu_count": 3,
28+
"gpus": [
29+
{
30+
"index": 0,
31+
"name": "NVIDIA H100 PCIe",
32+
"memory_total_mb": 81559.0,
33+
"memory_used_mb": 483.125,
34+
"memory_free_mb": 81075.875,
35+
"gpu_utilization_pct": 99,
36+
"memory_utilization_pct": 22
37+
},
38+
{
39+
"index": 1,
40+
"name": "NVIDIA H100 PCIe",
41+
"memory_total_mb": 81559.0,
42+
"memory_used_mb": 74730.6875,
43+
"memory_free_mb": 6828.3125,
44+
"gpu_utilization_pct": 0,
45+
"memory_utilization_pct": 0
46+
},
47+
{
48+
"index": 2,
49+
"name": "NVIDIA H100 PCIe",
50+
"memory_total_mb": 81559.0,
51+
"memory_used_mb": 483.125,
52+
"memory_free_mb": 81075.875,
53+
"gpu_utilization_pct": 0,
54+
"memory_utilization_pct": 0
55+
}
56+
]
57+
},
58+
"gpu_after": {
59+
"gpu_count": 3,
60+
"gpus": [
61+
{
62+
"index": 0,
63+
"name": "NVIDIA H100 PCIe",
64+
"memory_total_mb": 81559.0,
65+
"memory_used_mb": 483.125,
66+
"memory_free_mb": 81075.875,
67+
"gpu_utilization_pct": 99,
68+
"memory_utilization_pct": 22
69+
},
70+
{
71+
"index": 1,
72+
"name": "NVIDIA H100 PCIe",
73+
"memory_total_mb": 81559.0,
74+
"memory_used_mb": 74730.6875,
75+
"memory_free_mb": 6828.3125,
76+
"gpu_utilization_pct": 0,
77+
"memory_utilization_pct": 0
78+
},
79+
{
80+
"index": 2,
81+
"name": "NVIDIA H100 PCIe",
82+
"memory_total_mb": 81559.0,
83+
"memory_used_mb": 483.125,
84+
"memory_free_mb": 81075.875,
85+
"gpu_utilization_pct": 0,
86+
"memory_utilization_pct": 0
87+
}
88+
]
89+
}
90+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"backend": "systemds",
3+
"backend_model": "Qwen/Qwen2.5-3B-Instruct",
4+
"workload": "embeddings",
5+
"concurrency": 1,
6+
"max_tokens": 16,
7+
"temperature": 0.0,
8+
"top_p": 0.9,
9+
"n_samples": 50,
10+
"timestamp": "2026-03-05T04:33:40.654560+00:00",
11+
"python_version": "3.12.3",
12+
"platform": "Linux-6.8.0-83-generic-x86_64-with-glibc2.39"
13+
}

scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/samples.jsonl

Lines changed: 50 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"git_commit_hash": "a692e00fa32d251dc7b1fba7a219f6820cfa319d",
3+
"timestamp_utc": "2026-03-05T04:39:34.607433+00:00",
4+
"python_version": "3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0]",
5+
"platform": {
6+
"os": "Linux",
7+
"architecture": "x86_64"
8+
},
9+
"backend": "systemds",
10+
"model": "Qwen/Qwen2.5-3B-Instruct",
11+
"workload_config_path": "/home/kubraaksu/systemds/scripts/staging/llm-bench/workloads/embeddings/config.yaml",
12+
"workload_config_sha256": "2d1ce87f23c894dd956b4354f78df96dcc271d192ed3d0d6f048eeb72b006c88",
13+
"gpu": {
14+
"gpu_count": 3,
15+
"gpus": [
16+
{
17+
"index": 0,
18+
"name": "NVIDIA H100 PCIe",
19+
"memory_total_mb": 81559.0,
20+
"memory_used_mb": 483.125,
21+
"memory_free_mb": 81075.875,
22+
"gpu_utilization_pct": 99,
23+
"memory_utilization_pct": 22
24+
},
25+
{
26+
"index": 1,
27+
"name": "NVIDIA H100 PCIe",
28+
"memory_total_mb": 81559.0,
29+
"memory_used_mb": 74730.6875,
30+
"memory_free_mb": 6828.3125,
31+
"gpu_utilization_pct": 0,
32+
"memory_utilization_pct": 0
33+
},
34+
{
35+
"index": 2,
36+
"name": "NVIDIA H100 PCIe",
37+
"memory_total_mb": 81559.0,
38+
"memory_used_mb": 483.125,
39+
"memory_free_mb": 81075.875,
40+
"gpu_utilization_pct": 0,
41+
"memory_utilization_pct": 0
42+
}
43+
]
44+
}
45+
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
{
2+
"n": 50.0,
3+
"latency_ms_mean": 59.78951898403466,
4+
"latency_ms_std": 6.557560522023414,
5+
"latency_ms_min": 50.509518984034656,
6+
"latency_ms_max": 94.50951898403466,
7+
"latency_ms_p50": 60.509518984034656,
8+
"latency_ms_p95": 64.05951898403465,
9+
"latency_ms_cv": 0.10967742563331964,
10+
"throughput_req_per_s": 16.0204517240011,
11+
"accuracy_mean": 0.9,
12+
"accuracy_count": "45/50",
13+
"pearson_r": 0.9172814516781994,
14+
"pearson_n": 50,
15+
"total_input_tokens": 3589,
16+
"total_output_tokens": 250,
17+
"total_tokens": 3839,
18+
"electricity_kwh": 0.00030343158825093673,
19+
"electricity_cost_usd": 9.102947647528102e-05,
20+
"hardware_amortization_usd": 0.0017338947900053527,
21+
"total_compute_cost_usd": 0.0018249242664806337,
22+
"memory_mb_initial": 140.0,
23+
"memory_mb_peak": 159.0,
24+
"memory_mb_avg": 156.28571428571428,
25+
"cpu_percent_avg": 1.7142857142857142,
26+
"gpu_info": {
27+
"gpu_count": 3,
28+
"gpus": [
29+
{
30+
"index": 0,
31+
"name": "NVIDIA H100 PCIe",
32+
"memory_total_mb": 81559.0,
33+
"memory_used_mb": 483.125,
34+
"memory_free_mb": 81075.875,
35+
"gpu_utilization_pct": 99,
36+
"memory_utilization_pct": 22
37+
},
38+
{
39+
"index": 1,
40+
"name": "NVIDIA H100 PCIe",
41+
"memory_total_mb": 81559.0,
42+
"memory_used_mb": 74730.6875,
43+
"memory_free_mb": 6828.3125,
44+
"gpu_utilization_pct": 0,
45+
"memory_utilization_pct": 0
46+
},
47+
{
48+
"index": 2,
49+
"name": "NVIDIA H100 PCIe",
50+
"memory_total_mb": 81559.0,
51+
"memory_used_mb": 483.125,
52+
"memory_free_mb": 81075.875,
53+
"gpu_utilization_pct": 0,
54+
"memory_utilization_pct": 0
55+
}
56+
]
57+
},
58+
"gpu_after": {
59+
"gpu_count": 3,
60+
"gpus": [
61+
{
62+
"index": 0,
63+
"name": "NVIDIA H100 PCIe",
64+
"memory_total_mb": 81559.0,
65+
"memory_used_mb": 483.125,
66+
"memory_free_mb": 81075.875,
67+
"gpu_utilization_pct": 99,
68+
"memory_utilization_pct": 22
69+
},
70+
{
71+
"index": 1,
72+
"name": "NVIDIA H100 PCIe",
73+
"memory_total_mb": 81559.0,
74+
"memory_used_mb": 74730.6875,
75+
"memory_free_mb": 6828.3125,
76+
"gpu_utilization_pct": 0,
77+
"memory_utilization_pct": 0
78+
},
79+
{
80+
"index": 2,
81+
"name": "NVIDIA H100 PCIe",
82+
"memory_total_mb": 81559.0,
83+
"memory_used_mb": 483.125,
84+
"memory_free_mb": 81075.875,
85+
"gpu_utilization_pct": 0,
86+
"memory_utilization_pct": 0
87+
}
88+
]
89+
}
90+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"backend": "systemds",
3+
"backend_model": "Qwen/Qwen2.5-3B-Instruct",
4+
"workload": "embeddings",
5+
"concurrency": 1,
6+
"max_tokens": 16,
7+
"temperature": 0.0,
8+
"top_p": 0.9,
9+
"n_samples": 50,
10+
"timestamp": "2026-03-05T04:39:34.597020+00:00",
11+
"python_version": "3.12.3",
12+
"platform": "Linux-6.8.0-83-generic-x86_64-with-glibc2.39"
13+
}

0 commit comments

Comments
 (0)