apache
diff --git a/‎scripts/staging/llm-bench/README.md‎
Lines changed: 83 additions & 118 deletions b/‎scripts/staging/llm-bench/README.md‎
Lines changed: 83 additions & 118 deletions
diff --git a/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/manifest.json‎
Lines changed: 45 additions & 0 deletions b/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/manifest.json‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/metrics.json‎
Lines changed: 90 additions & 0 deletions b/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/metrics.json‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/run_config.json‎
Lines changed: 13 additions & 0 deletions b/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/run_config.json‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/samples.jsonl‎
Lines changed: 50 additions & 0 deletions b/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings/samples.jsonl‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings_reverse/manifest.json‎
Lines changed: 45 additions & 0 deletions b/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings_reverse/manifest.json‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings_reverse/metrics.json‎
Lines changed: 90 additions & 0 deletions b/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings_reverse/metrics.json‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings_reverse/run_config.json‎
Lines changed: 13 additions & 0 deletions b/‎scripts/staging/llm-bench/results/systemds_qwen3b_embeddings_reverse/run_config.json‎
Lines changed: 13 additions & 0 deletions
@@ -0,0 +1,45 @@
+{
+  "git_commit_hash": "a692e00fa32d251dc7b1fba7a219f6820cfa319d",
+  "timestamp_utc": "2026-03-05T04:33:40.663964+00:00",
+  "python_version": "3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0]",
+  "platform": {
+    "os": "Linux",
+    "architecture": "x86_64"
+  },
+  "backend": "systemds",
+  "model": "Qwen/Qwen2.5-3B-Instruct",
+  "workload_config_path": "/home/kubraaksu/systemds/scripts/staging/llm-bench/workloads/embeddings/config.yaml",
+  "workload_config_sha256": "2d1ce87f23c894dd956b4354f78df96dcc271d192ed3d0d6f048eeb72b006c88",
+  "gpu": {
+    "gpu_count": 3,
+    "gpus": [
+      {
+        "index": 0,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 99,
+        "memory_utilization_pct": 22
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 74730.6875,
+        "memory_free_mb": 6828.3125,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      }
+    ]
+  }
+}
@@ -0,0 +1,90 @@
+{
+  "n": 50.0,
+  "latency_ms_mean": 60.18735467921942,
+  "latency_ms_std": 6.098655589554143,
+  "latency_ms_min": 55.26735467921942,
+  "latency_ms_max": 97.26735467921942,
+  "latency_ms_p50": 58.26735467921942,
+  "latency_ms_p95": 63.26735467921942,
+  "latency_ms_cv": 0.10132785569424262,
+  "throughput_req_per_s": 15.879617740218775,
+  "accuracy_mean": 0.9,
+  "accuracy_count": "45/50",
+  "pearson_r": 0.9172814516781994,
+  "pearson_n": 50,
+  "total_input_tokens": 3589,
+  "total_output_tokens": 250,
+  "total_tokens": 3839,
+  "electricity_kwh": 0.00030612267818004406,
+  "electricity_cost_usd": 9.183680345401322e-05,
+  "hardware_amortization_usd": 0.0017492724467431092,
+  "total_compute_cost_usd": 0.0018411092501971224,
+  "memory_mb_initial": 139.0,
+  "memory_mb_peak": 158.0,
+  "memory_mb_avg": 155.28571428571428,
+  "cpu_percent_avg": 3.1285714285714286,
+  "gpu_info": {
+    "gpu_count": 3,
+    "gpus": [
+      {
+        "index": 0,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 99,
+        "memory_utilization_pct": 22
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 74730.6875,
+        "memory_free_mb": 6828.3125,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      }
+    ]
+  },
+  "gpu_after": {
+    "gpu_count": 3,
+    "gpus": [
+      {
+        "index": 0,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 99,
+        "memory_utilization_pct": 22
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 74730.6875,
+        "memory_free_mb": 6828.3125,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      }
+    ]
+  }
+}
@@ -0,0 +1,13 @@
+{
+  "backend": "systemds",
+  "backend_model": "Qwen/Qwen2.5-3B-Instruct",
+  "workload": "embeddings",
+  "concurrency": 1,
+  "max_tokens": 16,
+  "temperature": 0.0,
+  "top_p": 0.9,
+  "n_samples": 50,
+  "timestamp": "2026-03-05T04:33:40.654560+00:00",
+  "python_version": "3.12.3",
+  "platform": "Linux-6.8.0-83-generic-x86_64-with-glibc2.39"
+}
@@ -0,0 +1,45 @@
+{
+  "git_commit_hash": "a692e00fa32d251dc7b1fba7a219f6820cfa319d",
+  "timestamp_utc": "2026-03-05T04:39:34.607433+00:00",
+  "python_version": "3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0]",
+  "platform": {
+    "os": "Linux",
+    "architecture": "x86_64"
+  },
+  "backend": "systemds",
+  "model": "Qwen/Qwen2.5-3B-Instruct",
+  "workload_config_path": "/home/kubraaksu/systemds/scripts/staging/llm-bench/workloads/embeddings/config.yaml",
+  "workload_config_sha256": "2d1ce87f23c894dd956b4354f78df96dcc271d192ed3d0d6f048eeb72b006c88",
+  "gpu": {
+    "gpu_count": 3,
+    "gpus": [
+      {
+        "index": 0,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 99,
+        "memory_utilization_pct": 22
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 74730.6875,
+        "memory_free_mb": 6828.3125,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      }
+    ]
+  }
+}
@@ -0,0 +1,90 @@
+{
+  "n": 50.0,
+  "latency_ms_mean": 59.78951898403466,
+  "latency_ms_std": 6.557560522023414,
+  "latency_ms_min": 50.509518984034656,
+  "latency_ms_max": 94.50951898403466,
+  "latency_ms_p50": 60.509518984034656,
+  "latency_ms_p95": 64.05951898403465,
+  "latency_ms_cv": 0.10967742563331964,
+  "throughput_req_per_s": 16.0204517240011,
+  "accuracy_mean": 0.9,
+  "accuracy_count": "45/50",
+  "pearson_r": 0.9172814516781994,
+  "pearson_n": 50,
+  "total_input_tokens": 3589,
+  "total_output_tokens": 250,
+  "total_tokens": 3839,
+  "electricity_kwh": 0.00030343158825093673,
+  "electricity_cost_usd": 9.102947647528102e-05,
+  "hardware_amortization_usd": 0.0017338947900053527,
+  "total_compute_cost_usd": 0.0018249242664806337,
+  "memory_mb_initial": 140.0,
+  "memory_mb_peak": 159.0,
+  "memory_mb_avg": 156.28571428571428,
+  "cpu_percent_avg": 1.7142857142857142,
+  "gpu_info": {
+    "gpu_count": 3,
+    "gpus": [
+      {
+        "index": 0,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 99,
+        "memory_utilization_pct": 22
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 74730.6875,
+        "memory_free_mb": 6828.3125,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      }
+    ]
+  },
+  "gpu_after": {
+    "gpu_count": 3,
+    "gpus": [
+      {
+        "index": 0,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 99,
+        "memory_utilization_pct": 22
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 74730.6875,
+        "memory_free_mb": 6828.3125,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA H100 PCIe",
+        "memory_total_mb": 81559.0,
+        "memory_used_mb": 483.125,
+        "memory_free_mb": 81075.875,
+        "gpu_utilization_pct": 0,
+        "memory_utilization_pct": 0
+      }
+    ]
+  }
+}
@@ -0,0 +1,13 @@
+{
+  "backend": "systemds",
+  "backend_model": "Qwen/Qwen2.5-3B-Instruct",
+  "workload": "embeddings",
+  "concurrency": 1,
+  "max_tokens": 16,
+  "temperature": 0.0,
+  "top_p": 0.9,
+  "n_samples": 50,
+  "timestamp": "2026-03-05T04:39:34.597020+00:00",
+  "python_version": "3.12.3",
+  "platform": "Linux-6.8.0-83-generic-x86_64-with-glibc2.39"
+}