Merge pull request #2432 from othakkar/othakkar/remove_tf_serving_dev

jimmytwei · web-flow · commit 7d9f8ee19f0d · 2024-08-07T14:04:50.000-07:00
[development] Remove TF Serving
diff --git a/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/README.md b/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/README.md
@@ -2,7 +2,7 @@
 
 The `Enable Auto-Mixed Precision for Transfer Learning with TensorFlow*` sample guides you through the process of enabling auto-mixed precision to use low-precision datatypes, like bfloat16, for transfer learning with TensorFlow* (TF).
 
-The sample demonstrates the end-to-end pipeline tasks typically performed in a deep learning use-case: training (and retraining), inference optimization, and serving the model with TensorFlow Serving.
+The sample demonstrates the tasks typically performed in a deep learning use-case: training (and retraining), and inference optimization. The sample also includes tips and boilerplate code for serving the model with TensorFlow Serving.
 
 | Area                    | Description
 |:---                     |:---
@@ -37,10 +37,6 @@ You will need to download and install the following toolkits, tools, and compone
 
   Install using PIP: `$pip install notebook`. <br> Alternatively, see [*Installing Jupyter*](https://jupyter.org/install) for detailed installation instructions.
 
-- **TensorFlow Serving**
-
-  See *TensorFlow Serving* [*Installation*](https://www.tensorflow.org/tfx/serving/setup) for detailed installation options.
-
 - **Other dependencies**
 
   Install using PIP and the `requirements.txt` file supplied with the sample: `$pip install -r requirements.txt --no-deps`. <br> The `requirements.txt` file contains the necessary dependencies to run the Notebook.
@@ -112,6 +108,70 @@ You will see diagrams comparing performance and analysis. This includes performa
 
 For performance analysis, you will see histograms showing different Tensorflow* operations in the analyzed pre-trained model pb file.
 
+## Serve the model with TensorFlow Serving
+
+### Installation
+See *TensorFlow Serving* [*Installation*](https://www.tensorflow.org/tfx/serving/setup) for detailed installation options.
+
+### Example Code
+
+Create a copy of the optimized model in a well-defined directory hierarchy with a version number "1".
+
+```
+!mkdir serving
+!cp -r models/my_optimized_model serving/1
+```
+
+```
+os.environ["MODEL_DIR"] = os.getcwd() + "/serving"
+```
+
+This is where we start running TensorFlow Serving and load our model. After it loads we can start making inference requests using REST. There are some important parameters:
+- **rest_api_port**: The port that you'll use for REST requests.
+- **model_name**: You'll use this in the URL of REST requests. It can be anything.
+- **model_base_path**: This is the path to the directory where you've saved your model.
+
+```
+%%bash --bg
+nohup tensorflow_model_server --rest_api_port=8501 --model_name=rn50 --model_base_path=${MODEL_DIR} > server.log 2>&1
+```
+
+#### Prepare the testing data for prediction
+
+```
+for image_batch, labels_batch in val_ds:
+    print(image_batch.shape)
+    print(labels_batch.shape)
+    break
+test_data, test_labels = image_batch.numpy(), labels_batch.numpy()
+```
+
+#### Make REST requests
+
+Now let's create the JSON object for a batch of three inference requests and we'll send a predict request as a POST to our server's REST endpoint, and pass it three examples.
+
+```
+import json
+import matplotlib.pyplot as plt
+
+def show(idx, title):
+    plt.figure()
+    plt.imshow(test_data[idx])
+    plt.axis('off')
+    plt.title('\n\n{}'.format(title), fontdict={'size': 16})
+
+data = json.dumps({"signature_name": "serving_default", "instances": test_data[0:3].tolist()})
+print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))
+
+headers = {"content-type": "application/json"}
+json_response = requests.post('http://localhost:8501/v1/models/rn50:predict', data=data, headers=headers)
+predictions = json.loads(json_response.text)['predictions']
+
+for i in range(0,3):
+    show(i, 'The model thought this was a {} (class {}), and it was actually a {} (class {})'.format(
+        class_names[np.argmax(predictions[i])], np.argmax(predictions[i]), class_names[test_labels[i]], test_labels[i]))
+```
+
 ## License
 
 Code samples are licensed under the MIT license. See
diff --git a/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/enabling_automixed_precision_for_transfer_learning_with_tensorflow.ipynb b/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/enabling_automixed_precision_for_transfer_learning_with_tensorflow.ipynb
@@ -32,7 +32,6 @@
     "import tensorflow_hub as hub\n",
     "from datetime import datetime\n",
     "import requests\n",
-    "from copy import deepcopy\n",
     "print(\"We are using Tensorflow version: \", tf.__version__)"
    ]
   },
@@ -443,19 +442,33 @@
    "id": "8a03faef",
    "metadata": {},
    "source": [
-    "Let's measure the performance of the model we just saved using the `tf_benchmark.py` script that runs inference on dummy data."
+    "Let's measure the performance of the model we just saved using the `tf_benchmark.py` script that runs inference on dummy data.\n",
+    "\n",
+    "_Note: We only use the auto-mixed precision policy if the underlying system is the 4th Gen Intel® Xeon® scalable processor (codenamed Sapphire Rapids)_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db6aa4b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if arch == 'SPR':\n",
+    "    PRECISION = \"bfloat16\"\n",
+    "else:\n",
+    "    PRECISION = \"float32\"\n",
+    "print(\"Precision for inference: \", PRECISION)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "fd855747",
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "run scripts/tf_benchmark.py --model_path models/my_saved_model --num_warmup 5 --num_iter 50 --precision float32 --batch_size 32 --disable_optimize"
+    "!python scripts/tf_benchmark.py --model_path models/my_saved_model --num_warmup 5 --num_iter 50 --precision PRECISION --batch_size 32 --disable_optimize"
    ]
   },
   {
@@ -486,7 +499,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "run scripts/freeze_optimize_v2.py --input_saved_model_dir=models/my_saved_model --output_saved_model_dir=models/my_optimized_model"
+    "!python scripts/freeze_optimize_v2.py --input_saved_model_dir=models/my_saved_model --output_saved_model_dir=models/my_optimized_model"
    ]
   },
   {
@@ -501,12 +514,10 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "480dddda",
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "run scripts/tf_benchmark.py --model_path models/my_optimized_model --num_warmup 5 --num_iter 50 --precision float32 --batch_size 32"
+    "!python scripts/tf_benchmark.py --model_path models/my_optimized_model --num_warmup 5 --num_iter 50 --precision PRECISION --batch_size 32"
    ]
   },
   {
@@ -526,174 +537,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "run scripts/plot.py"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8157a5ec",
-   "metadata": {},
-   "source": [
-    "### TensorFlow Serving\n",
-    "\n",
-    "In this section, we will initialize and run TensorFlow Serving natively to serve our retrained model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6a00c32d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!mkdir serving\n",
-    "!cp -r models/my_optimized_model serving/1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a45b5438",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "os.environ[\"MODEL_DIR\"] = os.getcwd() + \"/serving\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "edcd77c4",
-   "metadata": {},
-   "source": [
-    "This is where we start running TensorFlow Serving and load our model. After it loads we can start making inference requests using REST. There are some important parameters:\n",
-    "- **rest_api_port**: The port that you'll use for REST requests.\n",
-    "- **model_name**: You'll use this in the URL of REST requests. It can be anything.\n",
-    "- **model_base_path**: This is the path to the directory where you've saved your model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "34aee14f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%bash --bg\n",
-    "nohup tensorflow_model_server --rest_api_port=8501 --model_name=rn50 --model_base_path=${MODEL_DIR} > server.log 2>&1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e486894a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!tail server.log"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7dc7606d",
-   "metadata": {},
-   "source": [
-    "**Prepare the testing data for prediction**"
+    "!python scripts/plot.py"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c9dfa9d8",
+   "id": "7c1bd119-ffc1-4761-a614-c2ffd83e6b4c",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "for image_batch, labels_batch in val_ds:\n",
-    "    print(image_batch.shape)\n",
-    "    print(labels_batch.shape)\n",
-    "    break\n",
-    "test_data, test_labels = image_batch.numpy(), labels_batch.numpy()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5d4e5f62",
-   "metadata": {},
-   "source": [
-    "First, let's take a look at a random example from our test data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e2761dcf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "def show(idx, title):\n",
-    "    plt.figure()\n",
-    "    plt.imshow(test_data[idx])\n",
-    "    plt.axis('off')\n",
-    "    plt.title('\\n\\n{}'.format(title), fontdict={'size': 16})\n",
-    "\n",
-    "import random\n",
-    "rando = random.randint(0,test_data.shape[0]-1)\n",
-    "show(rando, 'An Example Image:')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3b362658",
-   "metadata": {},
-   "source": [
-    "#### Make a request to your model in TensorFlow Serving\n",
-    "\n",
-    "Now let's create the JSON object for a batch of three inference requests, and see how well our model recognizes things:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "831bf2d1",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "data = json.dumps({\"signature_name\": \"serving_default\", \"instances\": test_data[0:3].tolist()})\n",
-    "print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "427f3c8b",
-   "metadata": {},
-   "source": [
-    "#### Make REST requests\n",
-    "\n",
-    "We'll send a predict request as a POST to our server's REST endpoint, and pass it three examples."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3d7f5e5e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "headers = {\"content-type\": \"application/json\"}\n",
-    "json_response = requests.post('http://localhost:8501/v1/models/rn50:predict', data=data, headers=headers)\n",
-    "predictions = json.loads(json_response.text)['predictions']\n",
-    "\n",
-    "for i in range(0,3):\n",
-    "    show(i, 'The model thought this was a {} (class {}), and it was actually a {} (class {})'.format(\n",
-    "        class_names[np.argmax(predictions[i])], np.argmax(predictions[i]), class_names[test_labels[i]], test_labels[i]))"
-   ]
+   "source": []
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
@@ -704,7 +565,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/requirements.txt b/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/requirements.txt
@@ -1,4 +1,5 @@
-notebook
+neural_compressor==2.4.1
 Pillow
-tensorflow_hub
+py-cpuinfo
 requests
+tensorflow_hub==0.16.0
diff --git a/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/scripts/tf_benchmark.py b/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/scripts/tf_benchmark.py
@@ -190,7 +190,7 @@ def run_benchmark(model_details, args, find_graph_def):
         throughput = 1.0 / avg_time * args.batch_size
         print('Batch size = %d' % args.batch_size)
         print("Latency: {:.3f} ms".format(latency))
-        print("Throughput: {:.2f} fps".format(throughput))
+        print("Throughput: {:.2f} images per sec".format(throughput))
 
         # Logging to a file
         log_file = open("log.txt", "a")