|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 9, |
| 5 | + "execution_count": 1, |
6 | 6 | "metadata": {},
|
7 | 7 | "outputs": [],
|
8 | 8 | "source": [
|
|
80 | 80 | },
|
81 | 81 | {
|
82 | 82 | "cell_type": "code",
|
83 |
| - "execution_count": 3, |
| 83 | + "execution_count": 2, |
84 | 84 | "metadata": {},
|
85 | 85 | "outputs": [],
|
86 | 86 | "source": [
|
|
134 | 134 | },
|
135 | 135 | {
|
136 | 136 | "cell_type": "code",
|
137 |
| - "execution_count": 25, |
| 137 | + "execution_count": 9, |
138 | 138 | "metadata": {},
|
139 | 139 | "outputs": [],
|
140 | 140 | "source": [
|
141 | 141 | "import time\n",
|
142 | 142 | "import numpy as np\n",
|
143 | 143 | "\n",
|
| 144 | + "import torch.backends.cudnn as cudnn\n", |
| 145 | + "cudnn.benchmark = True\n", |
| 146 | + "\n", |
144 | 147 | "def benchmark(model, input_shape=(1024, 1, 32, 32), dtype='fp32', nwarmup=50, nruns=10000):\n",
|
145 | 148 | " input_data = torch.randn(input_shape)\n",
|
146 | 149 | " input_data = input_data.to(\"cuda\")\n",
|
147 | 150 | " if dtype=='fp16':\n",
|
148 | 151 | " input_data = input_data.half()\n",
|
149 | 152 | " \n",
|
150 |
| - " for _ in range(nwarmup):\n", |
151 |
| - " results = model(input_data)\n", |
| 153 | + " print(\"Warm up ...\")\n", |
| 154 | + " with torch.no_grad():\n", |
| 155 | + " for _ in range(nwarmup):\n", |
| 156 | + " features = model(input_data)\n", |
| 157 | + " torch.cuda.synchronize()\n", |
| 158 | + " print(\"Start timing ...\")\n", |
| 159 | + " timings = []\n", |
| 160 | + " with torch.no_grad():\n", |
| 161 | + " for i in range(1, nruns+1):\n", |
| 162 | + " start_time = time.time()\n", |
| 163 | + " features = model(input_data)\n", |
| 164 | + " torch.cuda.synchronize()\n", |
| 165 | + " end_time = time.time()\n", |
| 166 | + " timings.append(end_time - start_time)\n", |
| 167 | + " if i%1000==0:\n", |
| 168 | + " print('Iteration %d/%d, ave batch time %.2f ms'%(i, nruns, np.mean(timings)*1000))\n", |
| 169 | + "\n", |
| 170 | + " print(\"Input shape:\", input_data.size())\n", |
| 171 | + " print(\"Output features size:\", features.size())\n", |
152 | 172 | " \n",
|
153 |
| - " start_time = time.time()\n", |
154 |
| - " time_arr = []\n", |
155 |
| - " for _ in range(1, nruns+1):\n", |
156 |
| - " start_time = time.time()\n", |
157 |
| - " results = model(input_data)\n", |
158 |
| - " time_arr.append(time.time() - start_time)\n", |
159 |
| - " \n", |
160 |
| - " if _%1000==0:\n", |
161 |
| - " print('Iteration %d, ave batch time %.2f ms'%(_, np.mean(time_arr)*1000))\n", |
162 |
| - " \n", |
163 |
| - " print('Average batch time: %.2f ms'%(np.mean(time_arr)*1000)) " |
| 173 | + " print('Average batch time: %.2f ms'%(np.mean(timings)*1000))\n", |
| 174 | + " " |
164 | 175 | ]
|
165 | 176 | },
|
166 | 177 | {
|
|
172 | 183 | },
|
173 | 184 | {
|
174 | 185 | "cell_type": "code",
|
175 |
| - "execution_count": 26, |
| 186 | + "execution_count": 10, |
176 | 187 | "metadata": {},
|
177 | 188 | "outputs": [
|
178 | 189 | {
|
|
191 | 202 | ")"
|
192 | 203 | ]
|
193 | 204 | },
|
194 |
| - "execution_count": 26, |
| 205 | + "execution_count": 10, |
195 | 206 | "metadata": {},
|
196 | 207 | "output_type": "execute_result"
|
197 | 208 | }
|
|
203 | 214 | },
|
204 | 215 | {
|
205 | 216 | "cell_type": "code",
|
206 |
| - "execution_count": 27, |
| 217 | + "execution_count": 11, |
207 | 218 | "metadata": {},
|
208 | 219 | "outputs": [
|
209 | 220 | {
|
210 | 221 | "name": "stdout",
|
211 | 222 | "output_type": "stream",
|
212 | 223 | "text": [
|
213 |
| - "Iteration 1000, ave batch time 0.91 ms\n", |
214 |
| - "Iteration 2000, ave batch time 0.90 ms\n", |
215 |
| - "Iteration 3000, ave batch time 0.90 ms\n", |
216 |
| - "Iteration 4000, ave batch time 0.90 ms\n", |
217 |
| - "Iteration 5000, ave batch time 0.90 ms\n", |
218 |
| - "Iteration 6000, ave batch time 0.90 ms\n", |
219 |
| - "Iteration 7000, ave batch time 0.90 ms\n", |
220 |
| - "Iteration 8000, ave batch time 0.90 ms\n", |
221 |
| - "Iteration 9000, ave batch time 0.90 ms\n", |
222 |
| - "Iteration 10000, ave batch time 0.90 ms\n", |
223 |
| - "Average batch time: 0.90 ms\n" |
| 224 | + "Warm up ...\n", |
| 225 | + "Start timing ...\n", |
| 226 | + "Iteration 1000/10000, ave batch time 0.93 ms\n", |
| 227 | + "Iteration 2000/10000, ave batch time 0.93 ms\n", |
| 228 | + "Iteration 3000/10000, ave batch time 0.93 ms\n", |
| 229 | + "Iteration 4000/10000, ave batch time 0.93 ms\n", |
| 230 | + "Iteration 5000/10000, ave batch time 0.93 ms\n", |
| 231 | + "Iteration 6000/10000, ave batch time 0.93 ms\n", |
| 232 | + "Iteration 7000/10000, ave batch time 0.93 ms\n", |
| 233 | + "Iteration 8000/10000, ave batch time 0.93 ms\n", |
| 234 | + "Iteration 9000/10000, ave batch time 0.93 ms\n", |
| 235 | + "Iteration 10000/10000, ave batch time 0.93 ms\n", |
| 236 | + "Input shape: torch.Size([1024, 1, 32, 32])\n", |
| 237 | + "Output features size: torch.Size([1024, 10])\n", |
| 238 | + "Average batch time: 0.93 ms\n" |
224 | 239 | ]
|
225 | 240 | }
|
226 | 241 | ],
|
|
241 | 256 | },
|
242 | 257 | {
|
243 | 258 | "cell_type": "code",
|
244 |
| - "execution_count": 29, |
| 259 | + "execution_count": 12, |
245 | 260 | "metadata": {},
|
246 | 261 | "outputs": [
|
247 | 262 | {
|
|
263 | 278 | ")"
|
264 | 279 | ]
|
265 | 280 | },
|
266 |
| - "execution_count": 29, |
| 281 | + "execution_count": 12, |
267 | 282 | "metadata": {},
|
268 | 283 | "output_type": "execute_result"
|
269 | 284 | }
|
|
275 | 290 | },
|
276 | 291 | {
|
277 | 292 | "cell_type": "code",
|
278 |
| - "execution_count": 30, |
| 293 | + "execution_count": 13, |
279 | 294 | "metadata": {},
|
280 | 295 | "outputs": [
|
281 | 296 | {
|
282 | 297 | "name": "stdout",
|
283 | 298 | "output_type": "stream",
|
284 | 299 | "text": [
|
285 |
| - "Iteration 1000, ave batch time 0.66 ms\n", |
286 |
| - "Iteration 2000, ave batch time 0.66 ms\n", |
287 |
| - "Iteration 3000, ave batch time 0.66 ms\n", |
288 |
| - "Iteration 4000, ave batch time 0.65 ms\n", |
289 |
| - "Iteration 5000, ave batch time 0.65 ms\n", |
290 |
| - "Iteration 6000, ave batch time 0.65 ms\n", |
291 |
| - "Iteration 7000, ave batch time 0.65 ms\n", |
292 |
| - "Iteration 8000, ave batch time 0.65 ms\n", |
293 |
| - "Iteration 9000, ave batch time 0.65 ms\n", |
294 |
| - "Iteration 10000, ave batch time 0.65 ms\n", |
295 |
| - "Average batch time: 0.65 ms\n" |
| 300 | + "Warm up ...\n", |
| 301 | + "Start timing ...\n", |
| 302 | + "Iteration 1000/10000, ave batch time 0.68 ms\n", |
| 303 | + "Iteration 2000/10000, ave batch time 0.68 ms\n", |
| 304 | + "Iteration 3000/10000, ave batch time 0.68 ms\n", |
| 305 | + "Iteration 4000/10000, ave batch time 0.68 ms\n", |
| 306 | + "Iteration 5000/10000, ave batch time 0.68 ms\n", |
| 307 | + "Iteration 6000/10000, ave batch time 0.68 ms\n", |
| 308 | + "Iteration 7000/10000, ave batch time 0.68 ms\n", |
| 309 | + "Iteration 8000/10000, ave batch time 0.68 ms\n", |
| 310 | + "Iteration 9000/10000, ave batch time 0.68 ms\n", |
| 311 | + "Iteration 10000/10000, ave batch time 0.68 ms\n", |
| 312 | + "Input shape: torch.Size([1024, 1, 32, 32])\n", |
| 313 | + "Output features size: torch.Size([1024, 10])\n", |
| 314 | + "Average batch time: 0.68 ms\n" |
296 | 315 | ]
|
297 | 316 | }
|
298 | 317 | ],
|
|
311 | 330 | },
|
312 | 331 | {
|
313 | 332 | "cell_type": "code",
|
314 |
| - "execution_count": 32, |
| 333 | + "execution_count": 14, |
315 | 334 | "metadata": {},
|
316 | 335 | "outputs": [],
|
317 | 336 | "source": [
|
|
321 | 340 | },
|
322 | 341 | {
|
323 | 342 | "cell_type": "code",
|
324 |
| - "execution_count": 33, |
| 343 | + "execution_count": 15, |
325 | 344 | "metadata": {},
|
326 | 345 | "outputs": [
|
327 | 346 | {
|
|
343 | 362 | ")"
|
344 | 363 | ]
|
345 | 364 | },
|
346 |
| - "execution_count": 33, |
| 365 | + "execution_count": 15, |
347 | 366 | "metadata": {},
|
348 | 367 | "output_type": "execute_result"
|
349 | 368 | }
|
|
354 | 373 | },
|
355 | 374 | {
|
356 | 375 | "cell_type": "code",
|
357 |
| - "execution_count": 35, |
| 376 | + "execution_count": 16, |
358 | 377 | "metadata": {},
|
359 | 378 | "outputs": [
|
360 | 379 | {
|
361 | 380 | "name": "stdout",
|
362 | 381 | "output_type": "stream",
|
363 | 382 | "text": [
|
364 |
| - "Iteration 1000, ave batch time 0.64 ms\n", |
365 |
| - "Iteration 2000, ave batch time 0.65 ms\n", |
366 |
| - "Iteration 3000, ave batch time 0.65 ms\n", |
367 |
| - "Iteration 4000, ave batch time 0.64 ms\n", |
368 |
| - "Iteration 5000, ave batch time 0.64 ms\n", |
369 |
| - "Iteration 6000, ave batch time 0.64 ms\n", |
370 |
| - "Iteration 7000, ave batch time 0.64 ms\n", |
371 |
| - "Iteration 8000, ave batch time 0.64 ms\n", |
372 |
| - "Iteration 9000, ave batch time 0.64 ms\n", |
373 |
| - "Iteration 10000, ave batch time 0.64 ms\n", |
374 |
| - "Average batch time: 0.64 ms\n" |
| 383 | + "Warm up ...\n", |
| 384 | + "Start timing ...\n", |
| 385 | + "Iteration 1000/10000, ave batch time 0.68 ms\n", |
| 386 | + "Iteration 2000/10000, ave batch time 0.68 ms\n", |
| 387 | + "Iteration 3000/10000, ave batch time 0.68 ms\n", |
| 388 | + "Iteration 4000/10000, ave batch time 0.68 ms\n", |
| 389 | + "Iteration 5000/10000, ave batch time 0.68 ms\n", |
| 390 | + "Iteration 6000/10000, ave batch time 0.68 ms\n", |
| 391 | + "Iteration 7000/10000, ave batch time 0.68 ms\n", |
| 392 | + "Iteration 8000/10000, ave batch time 0.68 ms\n", |
| 393 | + "Iteration 9000/10000, ave batch time 0.68 ms\n", |
| 394 | + "Iteration 10000/10000, ave batch time 0.68 ms\n", |
| 395 | + "Input shape: torch.Size([1024, 1, 32, 32])\n", |
| 396 | + "Output features size: torch.Size([1024, 10])\n", |
| 397 | + "Average batch time: 0.68 ms\n" |
375 | 398 | ]
|
376 | 399 | }
|
377 | 400 | ],
|
|
398 | 421 | },
|
399 | 422 | {
|
400 | 423 | "cell_type": "code",
|
401 |
| - "execution_count": 8, |
| 424 | + "execution_count": 17, |
402 | 425 | "metadata": {},
|
403 | 426 | "outputs": [],
|
404 | 427 | "source": [
|
|
427 | 450 | },
|
428 | 451 | {
|
429 | 452 | "cell_type": "code",
|
430 |
| - "execution_count": 37, |
| 453 | + "execution_count": 18, |
431 | 454 | "metadata": {},
|
432 | 455 | "outputs": [
|
433 | 456 | {
|
434 | 457 | "name": "stdout",
|
435 | 458 | "output_type": "stream",
|
436 | 459 | "text": [
|
437 |
| - "Iteration 1000, ave batch time 0.19 ms\n", |
438 |
| - "Iteration 2000, ave batch time 0.18 ms\n", |
439 |
| - "Iteration 3000, ave batch time 0.19 ms\n", |
440 |
| - "Iteration 4000, ave batch time 0.19 ms\n", |
441 |
| - "Iteration 5000, ave batch time 0.19 ms\n", |
442 |
| - "Iteration 6000, ave batch time 0.19 ms\n", |
443 |
| - "Iteration 7000, ave batch time 0.19 ms\n", |
444 |
| - "Iteration 8000, ave batch time 0.19 ms\n", |
445 |
| - "Iteration 9000, ave batch time 0.19 ms\n", |
446 |
| - "Iteration 10000, ave batch time 0.19 ms\n", |
447 |
| - "Average batch time: 0.19 ms\n" |
| 460 | + "Warm up ...\n", |
| 461 | + "Start timing ...\n", |
| 462 | + "Iteration 1000/10000, ave batch time 0.24 ms\n", |
| 463 | + "Iteration 2000/10000, ave batch time 0.24 ms\n", |
| 464 | + "Iteration 3000/10000, ave batch time 0.24 ms\n", |
| 465 | + "Iteration 4000/10000, ave batch time 0.24 ms\n", |
| 466 | + "Iteration 5000/10000, ave batch time 0.24 ms\n", |
| 467 | + "Iteration 6000/10000, ave batch time 0.24 ms\n", |
| 468 | + "Iteration 7000/10000, ave batch time 0.23 ms\n", |
| 469 | + "Iteration 8000/10000, ave batch time 0.23 ms\n", |
| 470 | + "Iteration 9000/10000, ave batch time 0.23 ms\n", |
| 471 | + "Iteration 10000/10000, ave batch time 0.23 ms\n", |
| 472 | + "Input shape: torch.Size([1024, 1, 32, 32])\n", |
| 473 | + "Output features size: torch.Size([1, 10])\n", |
| 474 | + "Average batch time: 0.23 ms\n" |
448 | 475 | ]
|
449 | 476 | }
|
450 | 477 | ],
|
|
463 | 490 | },
|
464 | 491 | {
|
465 | 492 | "cell_type": "code",
|
466 |
| - "execution_count": 39, |
| 493 | + "execution_count": 19, |
467 | 494 | "metadata": {},
|
468 | 495 | "outputs": [],
|
469 | 496 | "source": [
|
|
492 | 519 | },
|
493 | 520 | {
|
494 | 521 | "cell_type": "code",
|
495 |
| - "execution_count": 40, |
| 522 | + "execution_count": 20, |
496 | 523 | "metadata": {},
|
497 | 524 | "outputs": [
|
498 | 525 | {
|
499 | 526 | "name": "stdout",
|
500 | 527 | "output_type": "stream",
|
501 | 528 | "text": [
|
502 |
| - "Iteration 1000, ave batch time 0.18 ms\n", |
503 |
| - "Iteration 2000, ave batch time 0.18 ms\n", |
504 |
| - "Iteration 3000, ave batch time 0.18 ms\n", |
505 |
| - "Iteration 4000, ave batch time 0.18 ms\n", |
506 |
| - "Iteration 5000, ave batch time 0.18 ms\n", |
507 |
| - "Iteration 6000, ave batch time 0.18 ms\n", |
508 |
| - "Iteration 7000, ave batch time 0.18 ms\n", |
509 |
| - "Iteration 8000, ave batch time 0.18 ms\n", |
510 |
| - "Iteration 9000, ave batch time 0.18 ms\n", |
511 |
| - "Iteration 10000, ave batch time 0.18 ms\n", |
512 |
| - "Average batch time: 0.18 ms\n" |
| 529 | + "Warm up ...\n", |
| 530 | + "Start timing ...\n", |
| 531 | + "Iteration 1000/10000, ave batch time 0.20 ms\n", |
| 532 | + "Iteration 2000/10000, ave batch time 0.20 ms\n", |
| 533 | + "Iteration 3000/10000, ave batch time 0.20 ms\n", |
| 534 | + "Iteration 4000/10000, ave batch time 0.21 ms\n", |
| 535 | + "Iteration 5000/10000, ave batch time 0.21 ms\n", |
| 536 | + "Iteration 6000/10000, ave batch time 0.21 ms\n", |
| 537 | + "Iteration 7000/10000, ave batch time 0.21 ms\n", |
| 538 | + "Iteration 8000/10000, ave batch time 0.21 ms\n", |
| 539 | + "Iteration 9000/10000, ave batch time 0.21 ms\n", |
| 540 | + "Iteration 10000/10000, ave batch time 0.21 ms\n", |
| 541 | + "Input shape: torch.Size([1024, 1, 32, 32])\n", |
| 542 | + "Output features size: torch.Size([1, 10])\n", |
| 543 | + "Average batch time: 0.21 ms\n" |
513 | 544 | ]
|
514 | 545 | }
|
515 | 546 | ],
|
|
0 commit comments