Skip to content

Commit ae50ec2

Browse files
authored
feat(benchmarks): add Jupyter Notebook for results exploitation (#286)
Add a Jupyter Notebook to fetch and transform benchmark results into Pandas `DataFrame` and CSV file.
1 parent 066134b commit ae50ec2

File tree

2 files changed

+222
-0
lines changed

2 files changed

+222
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,8 @@ multirun/
3333
# Unit test / coverage reports
3434
.hypothesis/
3535

36+
# Jupyter Notebook
37+
.ipynb_checkpoints
38+
3639
# Prevent publishing file with third party licenses
3740
THIRD-PARTY-LICENSES
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "36f63340-79b9-4f61-a6a3-c4883071c0b3",
6+
"metadata": {},
7+
"source": [
8+
"# Benchmark results aggregator\n",
9+
"\n",
10+
"This notebook helps to aggregate the benchmark results collected from a DynamoDB table."
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 4,
16+
"id": "6522fc8a931ffbc3",
17+
"metadata": {
18+
"ExecuteTime": {
19+
"end_time": "2024-12-17T16:15:52.368674Z",
20+
"start_time": "2024-12-17T16:15:51.425605Z"
21+
}
22+
},
23+
"outputs": [],
24+
"source": [
25+
"%%capture --no-display\n",
26+
"%load_ext autoreload\n",
27+
"%autoreload 2\n",
28+
"\n",
29+
"%pip install -q boto3 numpy pandas python-dotenv openpyxl"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"id": "d4a896ce-8398-4d20-8270-7f5b77206d2b",
35+
"metadata": {},
36+
"source": [
37+
"### Initialization (imports and constants)"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": 5,
43+
"id": "a371fc9062af6126",
44+
"metadata": {
45+
"ExecuteTime": {
46+
"end_time": "2024-12-17T16:15:52.388081Z",
47+
"start_time": "2024-12-17T16:15:52.375379Z"
48+
}
49+
},
50+
"outputs": [],
51+
"source": [
52+
"import os\n",
53+
"\n",
54+
"from dotenv import load_dotenv\n",
55+
"\n",
56+
"# Define the environment variables below in a \".env\" file: `load_dotenv()`\n",
57+
"# will source them automatically.\n",
58+
"load_dotenv()\n",
59+
"\n",
60+
"# AWS region and table name for where the benchmark results are stored.\n",
61+
"REGION = os.environ.get(\"DYNAMODB_REGION\")\n",
62+
"TABLE = os.environ.get(\"DYNAMODB_TABLE\")\n",
63+
"\n",
64+
"# S3 Connector for PyTorch versions to query, to compare benchmark results.\n",
65+
"PREVIOUS_VERSION = \"1.2.7\"\n",
66+
"NEXT_VERSION = \"1.3.0\""
67+
]
68+
},
69+
{
70+
"cell_type": "markdown",
71+
"id": "b06aa712-e2c1-48ea-8cc1-a8cd14701cf9",
72+
"metadata": {},
73+
"source": [
74+
"### Functions"
75+
]
76+
},
77+
{
78+
"cell_type": "code",
79+
"execution_count": 9,
80+
"id": "e14b9efad6ae3ad6",
81+
"metadata": {
82+
"ExecuteTime": {
83+
"end_time": "2024-12-17T16:16:16.363274Z",
84+
"start_time": "2024-12-17T16:16:16.348512Z"
85+
}
86+
},
87+
"outputs": [],
88+
"source": [
89+
"from datetime import datetime\n",
90+
"from typing import List\n",
91+
"\n",
92+
"import numpy as np\n",
93+
"import boto3\n",
94+
"\n",
95+
"\n",
96+
"def query_dynamodb(\n",
97+
" region: str, table_name: str, old_version: str, new_version: str\n",
98+
") -> List[dict]:\n",
99+
" \"\"\"Query DynamoDB for the latest run results.\"\"\"\n",
100+
" dynamodb = boto3.resource(\"dynamodb\", region_name=region)\n",
101+
"\n",
102+
" statement = f'SELECT * FROM \"{table_name}\" WHERE s3torchconnector_version IN [?, ?]'\n",
103+
" params = [old_version, new_version]\n",
104+
" response = dynamodb.meta.client.execute_statement(\n",
105+
" Statement=statement, Parameters=params\n",
106+
" )\n",
107+
"\n",
108+
" return response[\"Items\"]\n",
109+
"\n",
110+
"\n",
111+
"def transform(run_results: List[dict]) -> List[dict]:\n",
112+
" \"\"\"Build a list of row to be later concatenated in a :class:`pd.DataFrame`.\"\"\"\n",
113+
" rows = []\n",
114+
" for run_result in run_results:\n",
115+
" for job_result in run_result[\"job_results\"]:\n",
116+
" metrics_averaged = {\n",
117+
" k: float(np.mean(v)) # `float()` to cast away the `Decimal` part\n",
118+
" for k, v in job_result[\"metrics\"].items()\n",
119+
" if k != \"utilization\"\n",
120+
" }\n",
121+
" row = {\n",
122+
" \"version\": run_result[\"s3torchconnector_version\"],\n",
123+
" \"scenario\": run_result[\"scenario\"],\n",
124+
" \"disambiguator\": run_result.get(\"disambiguator\"),\n",
125+
" \"timestamp_utc\": datetime.fromtimestamp(\n",
126+
" float(run_result[\"timestamp_utc\"])\n",
127+
" ),\n",
128+
" **metrics_averaged,\n",
129+
" \"config\": job_result[\"config\"],\n",
130+
" }\n",
131+
" rows.append(row)\n",
132+
"\n",
133+
" return rows"
134+
]
135+
},
136+
{
137+
"cell_type": "markdown",
138+
"id": "94f68eef52fb0b5c",
139+
"metadata": {},
140+
"source": [
141+
"### Exploit data"
142+
]
143+
},
144+
{
145+
"cell_type": "code",
146+
"execution_count": 10,
147+
"id": "be008fb6acf09055",
148+
"metadata": {
149+
"ExecuteTime": {
150+
"end_time": "2024-12-17T16:16:18.143297Z",
151+
"start_time": "2024-12-17T16:16:18.040538Z"
152+
}
153+
},
154+
"outputs": [],
155+
"source": [
156+
"_run_results = query_dynamodb(REGION, TABLE, PREVIOUS_VERSION, NEXT_VERSION)"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"id": "ac3597a1",
163+
"metadata": {
164+
"ExecuteTime": {
165+
"end_time": "2024-12-17T16:16:18.808673Z",
166+
"start_time": "2024-12-17T16:16:18.782056Z"
167+
}
168+
},
169+
"outputs": [],
170+
"source": [
171+
"import pandas as pd\n",
172+
"\n",
173+
"_data = transform(_run_results)\n",
174+
"_table = pd.json_normalize(_data).set_index(\"version\")\n",
175+
"_table"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": 13,
181+
"id": "b4eed2752e6add17",
182+
"metadata": {
183+
"ExecuteTime": {
184+
"end_time": "2024-12-17T16:16:56.380528Z",
185+
"start_time": "2024-12-17T16:16:56.365683Z"
186+
}
187+
},
188+
"outputs": [],
189+
"source": [
190+
"import string\n",
191+
"import random\n",
192+
"\n",
193+
"_suffix = \"\".join(random.choices(string.ascii_letters, k=5))\n",
194+
"_table.to_csv(f\"benchmark_results_{_suffix}.csv\")"
195+
]
196+
}
197+
],
198+
"metadata": {
199+
"kernelspec": {
200+
"display_name": "venv",
201+
"language": "python",
202+
"name": "python3"
203+
},
204+
"language_info": {
205+
"codemirror_mode": {
206+
"name": "ipython",
207+
"version": 3
208+
},
209+
"file_extension": ".py",
210+
"mimetype": "text/x-python",
211+
"name": "python",
212+
"nbconvert_exporter": "python",
213+
"pygments_lexer": "ipython3",
214+
"version": "3.12.6"
215+
}
216+
},
217+
"nbformat": 4,
218+
"nbformat_minor": 5
219+
}

0 commit comments

Comments
 (0)