|
32 | 32 | "import tensorflow_hub as hub\n",
|
33 | 33 | "from datetime import datetime\n",
|
34 | 34 | "import requests\n",
|
35 |
| - "from copy import deepcopy\n", |
36 | 35 | "print(\"We are using Tensorflow version: \", tf.__version__)"
|
37 | 36 | ]
|
38 | 37 | },
|
|
443 | 442 | "id": "8a03faef",
|
444 | 443 | "metadata": {},
|
445 | 444 | "source": [
|
446 |
| - "Let's measure the performance of the model we just saved using the `tf_benchmark.py` script that runs inference on dummy data." |
| 445 | + "Let's measure the performance of the model we just saved using the `tf_benchmark.py` script that runs inference on dummy data.\n", |
| 446 | + "\n", |
| 447 | + "_Note: We only use the auto-mixed precision policy if the underlying system is the 4th Gen Intel® Xeon® scalable processor (codenamed Sapphire Rapids)_" |
| 448 | + ] |
| 449 | + }, |
| 450 | + { |
| 451 | + "cell_type": "code", |
| 452 | + "execution_count": null, |
| 453 | + "id": "db6aa4b4", |
| 454 | + "metadata": {}, |
| 455 | + "outputs": [], |
| 456 | + "source": [ |
| 457 | + "if arch == 'SPR':\n", |
| 458 | + " PRECISION = \"bfloat16\"\n", |
| 459 | + "else:\n", |
| 460 | + " PRECISION = \"float32\"\n", |
| 461 | + "print(\"Precision for inference: \", PRECISION)" |
447 | 462 | ]
|
448 | 463 | },
|
449 | 464 | {
|
450 | 465 | "cell_type": "code",
|
451 | 466 | "execution_count": null,
|
452 | 467 | "id": "fd855747",
|
453 |
| - "metadata": { |
454 |
| - "scrolled": false |
455 |
| - }, |
| 468 | + "metadata": {}, |
456 | 469 | "outputs": [],
|
457 | 470 | "source": [
|
458 |
| - "run scripts/tf_benchmark.py --model_path models/my_saved_model --num_warmup 5 --num_iter 50 --precision float32 --batch_size 32 --disable_optimize" |
| 471 | + "!python scripts/tf_benchmark.py --model_path models/my_saved_model --num_warmup 5 --num_iter 50 --precision PRECISION --batch_size 32 --disable_optimize" |
459 | 472 | ]
|
460 | 473 | },
|
461 | 474 | {
|
|
486 | 499 | "metadata": {},
|
487 | 500 | "outputs": [],
|
488 | 501 | "source": [
|
489 |
| - "run scripts/freeze_optimize_v2.py --input_saved_model_dir=models/my_saved_model --output_saved_model_dir=models/my_optimized_model" |
| 502 | + "!python scripts/freeze_optimize_v2.py --input_saved_model_dir=models/my_saved_model --output_saved_model_dir=models/my_optimized_model" |
490 | 503 | ]
|
491 | 504 | },
|
492 | 505 | {
|
|
501 | 514 | "cell_type": "code",
|
502 | 515 | "execution_count": null,
|
503 | 516 | "id": "480dddda",
|
504 |
| - "metadata": { |
505 |
| - "scrolled": false |
506 |
| - }, |
| 517 | + "metadata": {}, |
507 | 518 | "outputs": [],
|
508 | 519 | "source": [
|
509 |
| - "run scripts/tf_benchmark.py --model_path models/my_optimized_model --num_warmup 5 --num_iter 50 --precision float32 --batch_size 32" |
| 520 | + "!python scripts/tf_benchmark.py --model_path models/my_optimized_model --num_warmup 5 --num_iter 50 --precision PRECISION --batch_size 32" |
510 | 521 | ]
|
511 | 522 | },
|
512 | 523 | {
|
|
526 | 537 | "metadata": {},
|
527 | 538 | "outputs": [],
|
528 | 539 | "source": [
|
529 |
| - "run scripts/plot.py" |
530 |
| - ] |
531 |
| - }, |
532 |
| - { |
533 |
| - "cell_type": "markdown", |
534 |
| - "id": "8157a5ec", |
535 |
| - "metadata": {}, |
536 |
| - "source": [ |
537 |
| - "### TensorFlow Serving\n", |
538 |
| - "\n", |
539 |
| - "In this section, we will initialize and run TensorFlow Serving natively to serve our retrained model." |
540 |
| - ] |
541 |
| - }, |
542 |
| - { |
543 |
| - "cell_type": "code", |
544 |
| - "execution_count": null, |
545 |
| - "id": "6a00c32d", |
546 |
| - "metadata": {}, |
547 |
| - "outputs": [], |
548 |
| - "source": [ |
549 |
| - "!mkdir serving\n", |
550 |
| - "!cp -r models/my_optimized_model serving/1" |
551 |
| - ] |
552 |
| - }, |
553 |
| - { |
554 |
| - "cell_type": "code", |
555 |
| - "execution_count": null, |
556 |
| - "id": "a45b5438", |
557 |
| - "metadata": {}, |
558 |
| - "outputs": [], |
559 |
| - "source": [ |
560 |
| - "os.environ[\"MODEL_DIR\"] = os.getcwd() + \"/serving\"" |
561 |
| - ] |
562 |
| - }, |
563 |
| - { |
564 |
| - "cell_type": "markdown", |
565 |
| - "id": "edcd77c4", |
566 |
| - "metadata": {}, |
567 |
| - "source": [ |
568 |
| - "This is where we start running TensorFlow Serving and load our model. After it loads we can start making inference requests using REST. There are some important parameters:\n", |
569 |
| - "- **rest_api_port**: The port that you'll use for REST requests.\n", |
570 |
| - "- **model_name**: You'll use this in the URL of REST requests. It can be anything.\n", |
571 |
| - "- **model_base_path**: This is the path to the directory where you've saved your model." |
572 |
| - ] |
573 |
| - }, |
574 |
| - { |
575 |
| - "cell_type": "code", |
576 |
| - "execution_count": null, |
577 |
| - "id": "34aee14f", |
578 |
| - "metadata": {}, |
579 |
| - "outputs": [], |
580 |
| - "source": [ |
581 |
| - "%%bash --bg\n", |
582 |
| - "nohup tensorflow_model_server --rest_api_port=8501 --model_name=rn50 --model_base_path=${MODEL_DIR} > server.log 2>&1" |
583 |
| - ] |
584 |
| - }, |
585 |
| - { |
586 |
| - "cell_type": "code", |
587 |
| - "execution_count": null, |
588 |
| - "id": "e486894a", |
589 |
| - "metadata": {}, |
590 |
| - "outputs": [], |
591 |
| - "source": [ |
592 |
| - "!tail server.log" |
593 |
| - ] |
594 |
| - }, |
595 |
| - { |
596 |
| - "cell_type": "markdown", |
597 |
| - "id": "7dc7606d", |
598 |
| - "metadata": {}, |
599 |
| - "source": [ |
600 |
| - "**Prepare the testing data for prediction**" |
| 540 | + "!python scripts/plot.py" |
601 | 541 | ]
|
602 | 542 | },
|
603 | 543 | {
|
604 | 544 | "cell_type": "code",
|
605 | 545 | "execution_count": null,
|
606 |
| - "id": "c9dfa9d8", |
| 546 | + "id": "7c1bd119-ffc1-4761-a614-c2ffd83e6b4c", |
607 | 547 | "metadata": {},
|
608 | 548 | "outputs": [],
|
609 |
| - "source": [ |
610 |
| - "for image_batch, labels_batch in val_ds:\n", |
611 |
| - " print(image_batch.shape)\n", |
612 |
| - " print(labels_batch.shape)\n", |
613 |
| - " break\n", |
614 |
| - "test_data, test_labels = image_batch.numpy(), labels_batch.numpy()" |
615 |
| - ] |
616 |
| - }, |
617 |
| - { |
618 |
| - "cell_type": "markdown", |
619 |
| - "id": "5d4e5f62", |
620 |
| - "metadata": {}, |
621 |
| - "source": [ |
622 |
| - "First, let's take a look at a random example from our test data." |
623 |
| - ] |
624 |
| - }, |
625 |
| - { |
626 |
| - "cell_type": "code", |
627 |
| - "execution_count": null, |
628 |
| - "id": "e2761dcf", |
629 |
| - "metadata": {}, |
630 |
| - "outputs": [], |
631 |
| - "source": [ |
632 |
| - "import matplotlib.pyplot as plt\n", |
633 |
| - "\n", |
634 |
| - "def show(idx, title):\n", |
635 |
| - " plt.figure()\n", |
636 |
| - " plt.imshow(test_data[idx])\n", |
637 |
| - " plt.axis('off')\n", |
638 |
| - " plt.title('\\n\\n{}'.format(title), fontdict={'size': 16})\n", |
639 |
| - "\n", |
640 |
| - "import random\n", |
641 |
| - "rando = random.randint(0,test_data.shape[0]-1)\n", |
642 |
| - "show(rando, 'An Example Image:')" |
643 |
| - ] |
644 |
| - }, |
645 |
| - { |
646 |
| - "cell_type": "markdown", |
647 |
| - "id": "3b362658", |
648 |
| - "metadata": {}, |
649 |
| - "source": [ |
650 |
| - "#### Make a request to your model in TensorFlow Serving\n", |
651 |
| - "\n", |
652 |
| - "Now let's create the JSON object for a batch of three inference requests, and see how well our model recognizes things:" |
653 |
| - ] |
654 |
| - }, |
655 |
| - { |
656 |
| - "cell_type": "code", |
657 |
| - "execution_count": null, |
658 |
| - "id": "831bf2d1", |
659 |
| - "metadata": { |
660 |
| - "scrolled": true |
661 |
| - }, |
662 |
| - "outputs": [], |
663 |
| - "source": [ |
664 |
| - "import json\n", |
665 |
| - "data = json.dumps({\"signature_name\": \"serving_default\", \"instances\": test_data[0:3].tolist()})\n", |
666 |
| - "print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))" |
667 |
| - ] |
668 |
| - }, |
669 |
| - { |
670 |
| - "cell_type": "markdown", |
671 |
| - "id": "427f3c8b", |
672 |
| - "metadata": {}, |
673 |
| - "source": [ |
674 |
| - "#### Make REST requests\n", |
675 |
| - "\n", |
676 |
| - "We'll send a predict request as a POST to our server's REST endpoint, and pass it three examples." |
677 |
| - ] |
678 |
| - }, |
679 |
| - { |
680 |
| - "cell_type": "code", |
681 |
| - "execution_count": null, |
682 |
| - "id": "3d7f5e5e", |
683 |
| - "metadata": {}, |
684 |
| - "outputs": [], |
685 |
| - "source": [ |
686 |
| - "headers = {\"content-type\": \"application/json\"}\n", |
687 |
| - "json_response = requests.post('http://localhost:8501/v1/models/rn50:predict', data=data, headers=headers)\n", |
688 |
| - "predictions = json.loads(json_response.text)['predictions']\n", |
689 |
| - "\n", |
690 |
| - "for i in range(0,3):\n", |
691 |
| - " show(i, 'The model thought this was a {} (class {}), and it was actually a {} (class {})'.format(\n", |
692 |
| - " class_names[np.argmax(predictions[i])], np.argmax(predictions[i]), class_names[test_labels[i]], test_labels[i]))" |
693 |
| - ] |
| 549 | + "source": [] |
694 | 550 | }
|
695 | 551 | ],
|
696 | 552 | "metadata": {
|
| 553 | + "kernelspec": { |
| 554 | + "display_name": "Python 3 (ipykernel)", |
| 555 | + "language": "python", |
| 556 | + "name": "python3" |
| 557 | + }, |
697 | 558 | "language_info": {
|
698 | 559 | "codemirror_mode": {
|
699 | 560 | "name": "ipython",
|
|
704 | 565 | "name": "python",
|
705 | 566 | "nbconvert_exporter": "python",
|
706 | 567 | "pygments_lexer": "ipython3",
|
707 |
| - "version": "3.8.12" |
| 568 | + "version": "3.10.12" |
708 | 569 | }
|
709 | 570 | },
|
710 | 571 | "nbformat": 4,
|
|
0 commit comments