|
215 | 215 | {
|
216 | 216 | "cell_type": "code",
|
217 | 217 | "execution_count": 5,
|
218 |
| - "metadata": { |
219 |
| - "scrolled": false |
220 |
| - }, |
| 218 | + "metadata": {}, |
221 | 219 | "outputs": [
|
222 | 220 | {
|
223 | 221 | "name": "stdout",
|
224 | 222 | "output_type": "stream",
|
225 | 223 | "text": [
|
226 |
| - "Warm up ...\n" |
227 |
| - ] |
228 |
| - }, |
229 |
| - { |
230 |
| - "name": "stderr", |
231 |
| - "output_type": "stream", |
232 |
| - "text": [ |
233 |
| - "/opt/conda/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at ../c10/core/TensorImpl.h:1153.)\n", |
234 |
| - " return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)\n" |
235 |
| - ] |
236 |
| - }, |
237 |
| - { |
238 |
| - "name": "stdout", |
239 |
| - "output_type": "stream", |
240 |
| - "text": [ |
| 224 | + "Warm up ...\n", |
241 | 225 | "Start timing ...\n",
|
242 |
| - "Iteration 100/1000, ave batch time 5.14 ms\n", |
243 |
| - "Iteration 200/1000, ave batch time 5.13 ms\n", |
244 |
| - "Iteration 300/1000, ave batch time 5.13 ms\n", |
245 |
| - "Iteration 400/1000, ave batch time 5.13 ms\n", |
246 |
| - "Iteration 500/1000, ave batch time 5.13 ms\n", |
247 |
| - "Iteration 600/1000, ave batch time 5.13 ms\n", |
248 |
| - "Iteration 700/1000, ave batch time 5.13 ms\n", |
249 |
| - "Iteration 800/1000, ave batch time 5.13 ms\n", |
250 |
| - "Iteration 900/1000, ave batch time 5.13 ms\n", |
251 |
| - "Iteration 1000/1000, ave batch time 5.13 ms\n", |
| 226 | + "Iteration 100/1000, ave batch time 5.22 ms\n", |
| 227 | + "Iteration 200/1000, ave batch time 5.22 ms\n", |
| 228 | + "Iteration 300/1000, ave batch time 5.22 ms\n", |
| 229 | + "Iteration 400/1000, ave batch time 5.22 ms\n", |
| 230 | + "Iteration 500/1000, ave batch time 5.22 ms\n", |
| 231 | + "Iteration 600/1000, ave batch time 5.21 ms\n", |
| 232 | + "Iteration 700/1000, ave batch time 5.22 ms\n", |
| 233 | + "Iteration 800/1000, ave batch time 5.21 ms\n", |
| 234 | + "Iteration 900/1000, ave batch time 5.21 ms\n", |
| 235 | + "Iteration 1000/1000, ave batch time 5.21 ms\n", |
252 | 236 | "Input shape: torch.Size([1024, 1, 32, 32])\n",
|
253 | 237 | "Output features size: torch.Size([1024, 10])\n",
|
254 |
| - "Average batch time: 5.13 ms\n" |
| 238 | + "Average batch time: 5.21 ms\n" |
255 | 239 | ]
|
256 | 240 | }
|
257 | 241 | ],
|
|
315 | 299 | "text": [
|
316 | 300 | "Warm up ...\n",
|
317 | 301 | "Start timing ...\n",
|
318 |
| - "Iteration 100/1000, ave batch time 5.13 ms\n", |
319 |
| - "Iteration 200/1000, ave batch time 5.13 ms\n", |
320 |
| - "Iteration 300/1000, ave batch time 5.13 ms\n", |
321 |
| - "Iteration 400/1000, ave batch time 5.13 ms\n", |
322 |
| - "Iteration 500/1000, ave batch time 5.13 ms\n", |
323 |
| - "Iteration 600/1000, ave batch time 5.13 ms\n", |
324 |
| - "Iteration 700/1000, ave batch time 5.13 ms\n", |
325 |
| - "Iteration 800/1000, ave batch time 5.13 ms\n", |
326 |
| - "Iteration 900/1000, ave batch time 5.13 ms\n", |
327 |
| - "Iteration 1000/1000, ave batch time 5.13 ms\n", |
| 302 | + "Iteration 100/1000, ave batch time 5.22 ms\n", |
| 303 | + "Iteration 200/1000, ave batch time 5.21 ms\n", |
| 304 | + "Iteration 300/1000, ave batch time 5.21 ms\n", |
| 305 | + "Iteration 400/1000, ave batch time 5.21 ms\n", |
| 306 | + "Iteration 500/1000, ave batch time 5.22 ms\n", |
| 307 | + "Iteration 600/1000, ave batch time 5.22 ms\n", |
| 308 | + "Iteration 700/1000, ave batch time 5.22 ms\n", |
| 309 | + "Iteration 800/1000, ave batch time 5.22 ms\n", |
| 310 | + "Iteration 900/1000, ave batch time 5.22 ms\n", |
| 311 | + "Iteration 1000/1000, ave batch time 5.22 ms\n", |
328 | 312 | "Input shape: torch.Size([1024, 1, 32, 32])\n",
|
329 | 313 | "Output features size: torch.Size([1024, 10])\n",
|
330 |
| - "Average batch time: 5.13 ms\n" |
| 314 | + "Average batch time: 5.22 ms\n" |
331 | 315 | ]
|
332 | 316 | }
|
333 | 317 | ],
|
|
398 | 382 | "text": [
|
399 | 383 | "Warm up ...\n",
|
400 | 384 | "Start timing ...\n",
|
401 |
| - "Iteration 100/1000, ave batch time 5.16 ms\n", |
402 |
| - "Iteration 200/1000, ave batch time 5.15 ms\n", |
403 |
| - "Iteration 300/1000, ave batch time 5.14 ms\n", |
404 |
| - "Iteration 400/1000, ave batch time 5.14 ms\n", |
405 |
| - "Iteration 500/1000, ave batch time 5.14 ms\n", |
406 |
| - "Iteration 600/1000, ave batch time 5.14 ms\n", |
407 |
| - "Iteration 700/1000, ave batch time 5.14 ms\n", |
408 |
| - "Iteration 800/1000, ave batch time 5.14 ms\n", |
409 |
| - "Iteration 900/1000, ave batch time 5.14 ms\n", |
410 |
| - "Iteration 1000/1000, ave batch time 5.14 ms\n", |
| 385 | + "Iteration 100/1000, ave batch time 5.21 ms\n", |
| 386 | + "Iteration 200/1000, ave batch time 5.21 ms\n", |
| 387 | + "Iteration 300/1000, ave batch time 5.27 ms\n", |
| 388 | + "Iteration 400/1000, ave batch time 5.28 ms\n", |
| 389 | + "Iteration 500/1000, ave batch time 5.27 ms\n", |
| 390 | + "Iteration 600/1000, ave batch time 5.26 ms\n", |
| 391 | + "Iteration 700/1000, ave batch time 5.26 ms\n", |
| 392 | + "Iteration 800/1000, ave batch time 5.25 ms\n", |
| 393 | + "Iteration 900/1000, ave batch time 5.25 ms\n", |
| 394 | + "Iteration 1000/1000, ave batch time 5.25 ms\n", |
411 | 395 | "Input shape: torch.Size([1024, 1, 32, 32])\n",
|
412 | 396 | "Output features size: torch.Size([1024, 10])\n",
|
413 |
| - "Average batch time: 5.14 ms\n" |
| 397 | + "Average batch time: 5.25 ms\n" |
414 | 398 | ]
|
415 | 399 | }
|
416 | 400 | ],
|
|
437 | 421 | },
|
438 | 422 | {
|
439 | 423 | "cell_type": "code",
|
440 |
| - "execution_count": 11, |
| 424 | + "execution_count": 17, |
441 | 425 | "metadata": {},
|
442 |
| - "outputs": [], |
| 426 | + "outputs": [ |
| 427 | + { |
| 428 | + "name": "stderr", |
| 429 | + "output_type": "stream", |
| 430 | + "text": [ |
| 431 | + "WARNING: [Torch-TensorRT] - For input x.1, found user specified input dtype as Float16, however when inspecting the graph, the input type expected was inferred to be Float\n", |
| 432 | + "The compiler is going to use the user setting Float16\n", |
| 433 | + "This conflict may cause an error at runtime due to partial compilation being enabled and therefore\n", |
| 434 | + "compatibility with PyTorch's data type convention is required.\n", |
| 435 | + "If you do indeed see errors at runtime either:\n", |
| 436 | + "- Remove the dtype spec for x.1\n", |
| 437 | + "- Disable partial compilation by setting require_full_compilation to True\n", |
| 438 | + "WARNING: [Torch-TensorRT TorchScript Conversion Context] - The logger passed into createInferBuilder differs from one already provided for an existing builder, runtime, or refitter. TensorRT maintains only a single logger pointer at any given time, so the existing value, which can be retrieved with getLogger(), will be used instead. In order to use a new logger, first destroy all existing builder, runner or refitter objects.\n", |
| 439 | + "\n", |
| 440 | + "WARNING: [Torch-TensorRT] - Dilation not used in Max pooling converter\n", |
| 441 | + "WARNING: [Torch-TensorRT] - Dilation not used in Max pooling converter\n", |
| 442 | + "WARNING: [Torch-TensorRT] - Detected invalid timing cache, setup a local cache instead\n", |
| 443 | + "WARNING: [Torch-TensorRT] - Max value of this profile is not valid\n" |
| 444 | + ] |
| 445 | + } |
| 446 | + ], |
443 | 447 | "source": [
|
444 | 448 | "import torch_tensorrt\n",
|
445 | 449 | "\n",
|
|
451 | 455 | " max_shape=[1024, 1, 34, 34],\n",
|
452 | 456 | " dtype=torch.half\n",
|
453 | 457 | " )],\n",
|
454 |
| - " \"enabled_precisions\": {torch.float, torch.half} # Run with FP16\n", |
| 458 | + " \"enabled_precisions\": {torch.half} # Run with FP16\n", |
455 | 459 | "}\n",
|
456 | 460 | "\n",
|
457 |
| - "trt_ts_module = torch_tensorrt.compile(traced_model, compile_settings)\n", |
| 461 | + "trt_ts_module = torch_tensorrt.compile(traced_model, **compile_settings)\n", |
458 | 462 | "\n",
|
459 | 463 | "input_data = torch.randn((1024, 1, 32, 32))\n",
|
460 | 464 | "input_data = input_data.half().to(\"cuda\")\n",
|
|
466 | 470 | },
|
467 | 471 | {
|
468 | 472 | "cell_type": "code",
|
469 |
| - "execution_count": 12, |
| 473 | + "execution_count": 20, |
470 | 474 | "metadata": {},
|
471 | 475 | "outputs": [
|
472 | 476 | {
|
|
475 | 479 | "text": [
|
476 | 480 | "Warm up ...\n",
|
477 | 481 | "Start timing ...\n",
|
478 |
| - "Iteration 100/1000, ave batch time 2.00 ms\n", |
479 |
| - "Iteration 200/1000, ave batch time 1.97 ms\n", |
480 |
| - "Iteration 300/1000, ave batch time 1.98 ms\n", |
481 |
| - "Iteration 400/1000, ave batch time 1.98 ms\n", |
482 |
| - "Iteration 500/1000, ave batch time 1.99 ms\n", |
483 |
| - "Iteration 600/1000, ave batch time 1.99 ms\n", |
484 |
| - "Iteration 700/1000, ave batch time 1.99 ms\n", |
485 |
| - "Iteration 800/1000, ave batch time 1.99 ms\n", |
486 |
| - "Iteration 900/1000, ave batch time 1.98 ms\n", |
487 |
| - "Iteration 1000/1000, ave batch time 1.98 ms\n", |
| 482 | + "Iteration 100/1000, ave batch time 2.47 ms\n", |
| 483 | + "Iteration 200/1000, ave batch time 2.40 ms\n", |
| 484 | + "Iteration 300/1000, ave batch time 2.35 ms\n", |
| 485 | + "Iteration 400/1000, ave batch time 2.35 ms\n", |
| 486 | + "Iteration 500/1000, ave batch time 2.35 ms\n", |
| 487 | + "Iteration 600/1000, ave batch time 2.35 ms\n", |
| 488 | + "Iteration 700/1000, ave batch time 2.36 ms\n", |
| 489 | + "Iteration 800/1000, ave batch time 2.35 ms\n", |
| 490 | + "Iteration 900/1000, ave batch time 2.91 ms\n", |
| 491 | + "Iteration 1000/1000, ave batch time 2.85 ms\n", |
488 | 492 | "Input shape: torch.Size([1024, 1, 32, 32])\n",
|
489 | 493 | "Output features size: torch.Size([1024, 10])\n",
|
490 |
| - "Average batch time: 1.98 ms\n" |
| 494 | + "Average batch time: 2.85 ms\n" |
491 | 495 | ]
|
492 | 496 | }
|
493 | 497 | ],
|
|
506 | 510 | },
|
507 | 511 | {
|
508 | 512 | "cell_type": "code",
|
509 |
| - "execution_count": 13, |
| 513 | + "execution_count": 21, |
510 | 514 | "metadata": {},
|
511 |
| - "outputs": [], |
| 515 | + "outputs": [ |
| 516 | + { |
| 517 | + "name": "stderr", |
| 518 | + "output_type": "stream", |
| 519 | + "text": [ |
| 520 | + "WARNING: [Torch-TensorRT] - For input x.1, found user specified input dtype as Float16, however when inspecting the graph, the input type expected was inferred to be Float\n", |
| 521 | + "The compiler is going to use the user setting Float16\n", |
| 522 | + "This conflict may cause an error at runtime due to partial compilation being enabled and therefore\n", |
| 523 | + "compatibility with PyTorch's data type convention is required.\n", |
| 524 | + "If you do indeed see errors at runtime either:\n", |
| 525 | + "- Remove the dtype spec for x.1\n", |
| 526 | + "- Disable partial compilation by setting require_full_compilation to True\n", |
| 527 | + "WARNING: [Torch-TensorRT TorchScript Conversion Context] - The logger passed into createInferBuilder differs from one already provided for an existing builder, runtime, or refitter. TensorRT maintains only a single logger pointer at any given time, so the existing value, which can be retrieved with getLogger(), will be used instead. In order to use a new logger, first destroy all existing builder, runner or refitter objects.\n", |
| 528 | + "\n", |
| 529 | + "WARNING: [Torch-TensorRT] - Dilation not used in Max pooling converter\n", |
| 530 | + "WARNING: [Torch-TensorRT] - Dilation not used in Max pooling converter\n", |
| 531 | + "WARNING: [Torch-TensorRT] - Detected invalid timing cache, setup a local cache instead\n", |
| 532 | + "WARNING: [Torch-TensorRT] - Max value of this profile is not valid\n" |
| 533 | + ] |
| 534 | + } |
| 535 | + ], |
512 | 536 | "source": [
|
513 | 537 | "import torch_tensorrt\n",
|
514 | 538 | "\n",
|
|
520 | 544 | " max_shape=[1024, 1, 34, 34],\n",
|
521 | 545 | " dtype=torch.half\n",
|
522 | 546 | " )],\n",
|
523 |
| - " \"enabled_precisions\": {torch.float, torch.half} # Run with FP16\n", |
| 547 | + " \"enabled_precisions\": {torch.half} # Run with FP16\n", |
524 | 548 | "}\n",
|
525 | 549 | "\n",
|
526 |
| - "trt_script_module = torch_tensorrt.compile(script_model, compile_settings)\n", |
| 550 | + "trt_script_module = torch_tensorrt.compile(script_model, **compile_settings)\n", |
527 | 551 | "\n",
|
528 | 552 | "input_data = torch.randn((1024, 1, 32, 32))\n",
|
529 | 553 | "input_data = input_data.half().to(\"cuda\")\n",
|
|
535 | 559 | },
|
536 | 560 | {
|
537 | 561 | "cell_type": "code",
|
538 |
| - "execution_count": 14, |
| 562 | + "execution_count": 22, |
539 | 563 | "metadata": {},
|
540 | 564 | "outputs": [
|
541 | 565 | {
|
|
544 | 568 | "text": [
|
545 | 569 | "Warm up ...\n",
|
546 | 570 | "Start timing ...\n",
|
547 |
| - "Iteration 100/1000, ave batch time 2.02 ms\n", |
548 |
| - "Iteration 200/1000, ave batch time 1.98 ms\n", |
549 |
| - "Iteration 300/1000, ave batch time 1.97 ms\n", |
550 |
| - "Iteration 400/1000, ave batch time 1.96 ms\n", |
551 |
| - "Iteration 500/1000, ave batch time 1.96 ms\n", |
552 |
| - "Iteration 600/1000, ave batch time 1.96 ms\n", |
553 |
| - "Iteration 700/1000, ave batch time 1.96 ms\n", |
554 |
| - "Iteration 800/1000, ave batch time 1.96 ms\n", |
555 |
| - "Iteration 900/1000, ave batch time 1.96 ms\n", |
556 |
| - "Iteration 1000/1000, ave batch time 1.96 ms\n", |
| 571 | + "Iteration 100/1000, ave batch time 2.34 ms\n", |
| 572 | + "Iteration 200/1000, ave batch time 2.34 ms\n", |
| 573 | + "Iteration 300/1000, ave batch time 2.35 ms\n", |
| 574 | + "Iteration 400/1000, ave batch time 2.36 ms\n", |
| 575 | + "Iteration 500/1000, ave batch time 2.58 ms\n", |
| 576 | + "Iteration 600/1000, ave batch time 3.26 ms\n", |
| 577 | + "Iteration 700/1000, ave batch time 3.13 ms\n", |
| 578 | + "Iteration 800/1000, ave batch time 3.02 ms\n", |
| 579 | + "Iteration 900/1000, ave batch time 2.96 ms\n", |
| 580 | + "Iteration 1000/1000, ave batch time 2.90 ms\n", |
557 | 581 | "Input shape: torch.Size([1024, 1, 32, 32])\n",
|
558 | 582 | "Output features size: torch.Size([1024, 10])\n",
|
559 |
| - "Average batch time: 1.96 ms\n" |
| 583 | + "Average batch time: 2.90 ms\n" |
560 | 584 | ]
|
561 | 585 | }
|
562 | 586 | ],
|
|
579 | 603 | ],
|
580 | 604 | "metadata": {
|
581 | 605 | "kernelspec": {
|
582 |
| - "display_name": "Python 3", |
| 606 | + "display_name": "Python 3 (ipykernel)", |
583 | 607 | "language": "python",
|
584 | 608 | "name": "python3"
|
585 | 609 | },
|
|
593 | 617 | "name": "python",
|
594 | 618 | "nbconvert_exporter": "python",
|
595 | 619 | "pygments_lexer": "ipython3",
|
596 |
| - "version": "3.6.13" |
| 620 | + "version": "3.8.10" |
597 | 621 | }
|
598 | 622 | },
|
599 | 623 | "nbformat": 4,
|
|
0 commit comments