tensorflow
diff --git a/‎src/python/tensorflow_cloud/examples/distributed_training_nasnet_with_tensorflow_cloud.ipynb‎
Lines changed: 376 additions & 0 deletions b/‎src/python/tensorflow_cloud/examples/distributed_training_nasnet_with_tensorflow_cloud.ipynb‎
Lines changed: 376 additions & 0 deletions
diff --git a/‎src/python/tensorflow_cloud/examples/google_cloud_project_setup_instructions.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎src/python/tensorflow_cloud/examples/google_cloud_project_setup_instructions.ipynb‎
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,376 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PRgkg_PlTviL"
+      },
+      "source": [
+        "# Distributed training NasNet with tensorflow_cloud and Google Cloud\n",
+        "\n",
+        "This example is based on [Image classification via fine-tuning with EfficientNet](https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/) to demonstrate how to train a [NasNetMobile](https://keras.io/api/applications/nasnet/#nasnetmobile-function) model using [tensorflow_cloud](https://github.com/tensorflow/cloud) and Google Cloud Platform at scale using distributed training.\n",
+        "\n",
+        "\u003ctable align=\"left\"\u003e\n",
+        "    \u003ctd\u003e\n",
+        "        \u003ca href=\"https://colab.research.google.com/github/tensorflow/cloud/blob/master/src/python/tensorflow_cloud/examples/distributed_training_nasnet_with_tensorflow_cloud.ipynb\"\u003e\n",
+        "            \u003cimg width=\"50\" src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"\u003eRun in Colab\n",
+        "        \u003c/a\u003e\n",
+        "    \u003c/td\u003e\n",
+        "    \u003ctd\u003e\n",
+        "        \u003ca href=\"https://github.com/tensorflow/cloud/blob/master/src/python/tensorflow_cloud/examples/distributed_training_nasnet_with_tensorflow_cloud.ipynb\"\u003e\n",
+        "            \u003cimg src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\"\u003eView on GitHub\n",
+        "        \u003c/a\u003e\n",
+        "     \u003c/td\u003e\n",
+        "    \u003ctd\u003e\n",
+        "        \u003ca href=\"https://www.kaggle.com/nitric/distributed-training-nasnet-with-tensorflow-cloud\"\u003e\n",
+        "            \u003cimg width=\"90\" src=\"https://www.kaggle.com/static/images/site-logo.png\" alt=\"Kaggle logo\"\u003eRun in Kaggle\n",
+        "        \u003c/a\u003e\n",
+        "     \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kA1D8jB3TviQ",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import sys\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_cloud as tfc"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vdLn2dl2TviR"
+      },
+      "source": [
+        "Set project parameters. For Google Cloud Specific parameters refer to [Google Cloud Project Setup Instructions](https://www.kaggle.com/nitric/google-cloud-project-setup-instructions/)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "b2Ev1lz-TviR",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "# Set Google Cloud Specific parameters\n",
+        "\n",
+        "# TODO: Please set GCP_PROJECT_ID to your own Google Cloud project ID.\n",
+        "GCP_PROJECT_ID = 'YOUR_PROJECT_ID' #@param {type:\"string\"}\n",
+        "\n",
+        "# TODO: set GCS_BUCKET to your own Google Cloud Storage (GCS) bucket.\n",
+        "GCS_BUCKET = 'YOUR_GCS_BUCKET_NAME' #@param {type:\"string\"}\n",
+        "\n",
+        "# DO NOT CHANGE: Currently only the 'us-central1' region is supported.\n",
+        "REGION = 'us-central1'\n",
+        "\n",
+        "# OPTIONAL: You can change the project name to any string.\n",
+        "JOB_NAME = 'nasnet' #@param {type:\"string\"}\n",
+        "\n",
+        "# Setting location were training logs and checkpoints will be stored\n",
+        "GCS_BASE_PATH = f'gs://{GCS_BUCKET}/{JOB_NAME}'\n",
+        "TENSORBOARD_LOGS_DIR = os.path.join(GCS_BASE_PATH,\"logs\")\n",
+        "MODEL_CHECKPOINT_DIR = os.path.join(GCS_BASE_PATH,\"checkpoints\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KQ4B0XjaTviR"
+      },
+      "source": [
+        "## Authenticating the notebook to use your Google Cloud Project\n",
+        "\n",
+        "For Kaggle Notebooks click on \"Add-ons\"-\u003e\"Google Cloud SDK\" before running the cell below."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vZp9qc3STviS",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "# Using tfc.remote() to ensure this code only runs in notebook\n",
+        "if not tfc.remote():\n",
+        "\n",
+        "    # Authentication for Kaggle Notebooks\n",
+        "    if \"kaggle_secrets\" in sys.modules:\n",
+        "        from kaggle_secrets import UserSecretsClient\n",
+        "        UserSecretsClient().set_gcloud_credentials(project=GCP_PROJECT_ID)\n",
+        "\n",
+        "    # Authentication for Colab Notebooks\n",
+        "    if \"google.colab\" in sys.modules:\n",
+        "        from google.colab import auth\n",
+        "        auth.authenticate_user()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4Jix595FTviS"
+      },
+      "source": [
+        "## Load and prepare data\n",
+        "Read raw data and split to train and test data sets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5xEWEh2fTviS",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()\n",
+        "\n",
+        "# Setting input specific parameters\n",
+        "# The model expects input of dimetions of (INPUT_IMG_SIZE, INPUT_IMG_SIZE, 3)\n",
+        "INPUT_IMG_SIZE = 32\n",
+        "NUM_CLASSES = 10"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "69fNjNqWTviT"
+      },
+      "source": [
+        " Add preprocessing layers APIs for image augmentation."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kstHXHtoTviT",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "from tensorflow.keras.layers.experimental import preprocessing\n",
+        "from tensorflow.keras.models import Sequential\n",
+        "\n",
+        "\n",
+        "img_augmentation = Sequential(\n",
+        "    [\n",
+        "        # Resizing input to better match ImageNet size\n",
+        "        preprocessing.Resizing(256, 256),\n",
+        "        preprocessing.RandomRotation(factor=0.15),\n",
+        "        preprocessing.RandomFlip(),\n",
+        "        preprocessing.RandomContrast(factor=0.1),\n",
+        "    ],\n",
+        "    name=\"img_augmentation\",\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QkYgwEBgTviU"
+      },
+      "source": [
+        "## Load the model and prepare for training\n",
+        "We will load a NASNetMobile pretrained model (with weights) and unfreeze a few layers for fine tuning the model to better match the dataset."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NhL5g2YoTviU",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "from tensorflow.keras import layers\n",
+        "\n",
+        "def build_model(num_classes, input_image_size):\n",
+        "    inputs = layers.Input(shape=(input_image_size, input_image_size, 3))\n",
+        "    x = img_augmentation(inputs)\n",
+        "\n",
+        "    model = tf.keras.applications.NASNetMobile(\n",
+        "        input_shape=None,\n",
+        "        include_top=False,\n",
+        "        weights=\"imagenet\",\n",
+        "        input_tensor=x,\n",
+        "        pooling=None,\n",
+        "        classes=num_classes,\n",
+        "    )\n",
+        "\n",
+        "    # Freeze the pretrained weights\n",
+        "    model.trainable = False\n",
+        "\n",
+        "    # We unfreeze the top 20 layers while leaving BatchNorm layers frozen\n",
+        "    for layer in model.layers[-20:]:\n",
+        "        if not isinstance(layer, layers.BatchNormalization):\n",
+        "            layer.trainable = True\n",
+        "\n",
+        "    # Rebuild top\n",
+        "    x = layers.GlobalAveragePooling2D(name=\"avg_pool\")(model.output)\n",
+        "    x = layers.BatchNormalization()(x)\n",
+        "\n",
+        "    x = layers.Dense(128, activation=\"relu\")(x)\n",
+        "    x = layers.Dense(64, activation=\"relu\")(x)\n",
+        "    outputs = layers.Dense(num_classes, activation=\"softmax\", name=\"pred\")(x)\n",
+        "\n",
+        "    # Compile\n",
+        "    model = tf.keras.Model(inputs, outputs, name=\"NASNetMobile\")\n",
+        "    optimizer = tf.keras.optimizers.Adam(learning_rate=3e-4)\n",
+        "    model.compile(\n",
+        "        optimizer=optimizer,\n",
+        "        loss=\"sparse_categorical_crossentropy\",\n",
+        "        metrics=[\"accuracy\"]\n",
+        "    )\n",
+        "    return model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jQwNarnJTviU",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "model = build_model(NUM_CLASSES, INPUT_IMG_SIZE)\n",
+        "\n",
+        "if tfc.remote():\n",
+        "    # Configure Tensorboard logs\n",
+        "    callbacks=[\n",
+        "        tf.keras.callbacks.TensorBoard(log_dir=TENSORBOARD_LOGS_DIR),\n",
+        "        tf.keras.callbacks.ModelCheckpoint(\n",
+        "            MODEL_CHECKPOINT_DIR,\n",
+        "            save_best_only=True),\n",
+        "        tf.keras.callbacks.EarlyStopping(\n",
+        "            monitor='loss',\n",
+        "            min_delta =0.001,\n",
+        "            patience=3)]\n",
+        "\n",
+        "    model.fit(x=x_train, y=y_train, epochs=100,\n",
+        "              validation_split=0.2, callbacks=callbacks)\n",
+        "\n",
+        "else:\n",
+        "    # Run the training for 1 epoch and a small subset of the data to validate setup\n",
+        "    model.fit(x=x_train[:100], y=y_train[:100], validation_split=0.2, epochs=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "44CHwtcPTviV"
+      },
+      "source": [
+        "## Start the remote training\n",
+        "\n",
+        "This step will prepare your code from this notebook for remote execution and starts a distributed training remotely on Google Cloud Platfrom to train the model. Once the job is submitted you can go to the next step to monitor the jobs progress via Tensorboard.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "I4gSaGXgTviV",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "if not tfc.remote():\n",
+        "    print('Training on TensorFlow Cloud...')\n",
+        "\n",
+        "    # If you are using a custom image you can install modules via requirements\n",
+        "    # txt file.\n",
+        "    with open('requirements.txt','w') as f:\n",
+        "        f.write('tensorflow-cloud==0.1.12\\n')\n",
+        "\n",
+        "    # Optional: Some recommended base images. If you provide none the system\n",
+        "    # will choose one for you.\n",
+        "    TF_GPU_IMAGE= \"tensorflow/tensorflow:latest-gpu\"\n",
+        "    TF_CPU_IMAGE= \"tensorflow/tensorflow:latest\"\n",
+        "\n",
+        "    tfc.run(\n",
+        "        distribution_strategy='auto',\n",
+        "        requirements_txt='requirements.txt',\n",
+        "        docker_config=tfc.DockerConfig(\n",
+        "            parent_image=TF_GPU_IMAGE,\n",
+        "            image_build_bucket=GCS_BUCKET\n",
+        "            ),\n",
+        "        chief_config=tfc.COMMON_MACHINE_CONFIGS['K80_1X'],\n",
+        "         worker_config=tfc.COMMON_MACHINE_CONFIGS['K80_1X'],\n",
+        "         worker_count=3,\n",
+        "        job_labels={'job': JOB_NAME}\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fCN-XJCRTviV"
+      },
+      "source": [
+        "# Training Results\n",
+        "While the training is in progress you can use Tensorboard to view the results."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-dz-XpATTviV",
+        "trusted": true
+      },
+      "outputs": [],
+      "source": [
+        "if not tfc.remote():\n",
+        "\n",
+        "    %load_ext tensorboard\n",
+        "    %tensorboard --logdir TENSORBOARD_LOGS_DIR"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "distributed-training-nasnet-with-tensorflow-cloud.ipynb",
+      "provenance": [
+        {
+          "file_id": "1SRsTqmUqBJVWTyuteK7rmJtZthAn7Bth",
+          "timestamp": 1612656396209
+        }
+      ]
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -12,7 +12,7 @@
         "\u003ctable align=\"left\"\u003e\n",
         "    \u003ctd\u003e\n",
         "        \u003ca href=\"https://colab.research.google.com/github/tensorflow/cloud/blob/master/src/python/tensorflow_cloud/examples/google_cloud_project_setup_instructions.ipynb\"\u003e\n",
-        "            \u003cimg width=\"50\" src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"\u003e Run in Colab\n",
+        "            \u003cimg width=\"50\" src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"\u003eRun in Colab\n",
         "        \u003c/a\u003e\n",
         "    \u003c/td\u003e\n",
         "    \u003ctd\u003e\n",
@@ -22,7 +22,7 @@
         "     \u003c/td\u003e\n",
         "    \u003ctd\u003e\n",
         "        \u003ca href=\"https://www.kaggle.com/nitric/google-cloud-project-setup-instructions\"\u003e\n",
-        "            \u003cimg width=\"90\" src=\"https://www.kaggle.com/static/images/site-logo.png\" alt=\"Kaggle logo\"\u003eView on Kaggle\n",
+        "            \u003cimg width=\"90\" src=\"https://www.kaggle.com/static/images/site-logo.png\" alt=\"Kaggle logo\"\u003eRun in Kaggle\n",
         "        \u003c/a\u003e\n",
         "     \u003c/td\u003e\n",
         "\u003c/table\u003e\n"