Merge branch 'tensorflow:master' into patch-1

abhigyadufare · web-flow · commit cb521d13302f · 2023-06-03T02:21:44.000+05:30
diff --git a/site/en/tutorials/video/video_classification.ipynb b/site/en/tutorials/video/video_classification.ipynb
@@ -97,7 +97,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install remotezip tqdm opencv-python einops \n",
+        "!pip install remotezip tqdm opencv-python einops\n",
         "# Install TensorFlow 2.10\n",
         "!pip install tensorflow==2.10.0"
       ]
@@ -156,7 +156,7 @@
         "    List the files in each class of the dataset given the zip URL.\n",
         "\n",
         "    Args:\n",
-        "      zip_url: URL from which the files can be unzipped. \n",
+        "      zip_url: URL from which the files can be unzipped.\n",
         "\n",
         "    Return:\n",
         "      files: List of files in each of the classes.\n",
@@ -181,7 +181,7 @@
         "\n",
         "def get_files_per_class(files):\n",
         "  \"\"\"\n",
-        "    Retrieve the files that belong to each class. \n",
+        "    Retrieve the files that belong to each class.\n",
         "\n",
         "    Args:\n",
         "      files: List of files in the dataset.\n",
@@ -242,7 +242,7 @@
         "    Args:\n",
         "      zip_url: Zip URL containing data.\n",
         "      num_classes: Number of labels.\n",
-        "      splits: Dictionary specifying the training, validation, test, etc. (key) division of data \n",
+        "      splits: Dictionary specifying the training, validation, test, etc. (key) division of data\n",
         "              (value is number of files per split).\n",
         "      download_dir: Directory to download data to.\n",
         "\n",
@@ -282,7 +282,7 @@
         "    Pad and resize an image from a video.\n",
         "    \n",
         "    Args:\n",
-        "      frame: Image that needs to resized and padded. \n",
+        "      frame: Image that needs to resized and padded.\n",
         "      output_size: Pixel size of the output frame image.\n",
         "\n",
         "    Return:\n",
@@ -306,7 +306,7 @@
         "  \"\"\"\n",
         "  # Read each video frame by frame\n",
         "  result = []\n",
-        "  src = cv2.VideoCapture(str(video_path))  \n",
+        "  src = cv2.VideoCapture(str(video_path))\n",
         "\n",
         "  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)\n",
         "\n",
@@ -338,11 +338,11 @@
         "\n",
         "class FrameGenerator:\n",
         "  def __init__(self, path, n_frames, training = False):\n",
-        "    \"\"\" Returns a set of frames with their associated label. \n",
+        "    \"\"\" Returns a set of frames with their associated label.\n",
         "\n",
         "      Args:\n",
         "        path: Video file paths.\n",
-        "        n_frames: Number of frames. \n",
+        "        n_frames: Number of frames.\n",
         "        training: Boolean to determine if training dataset is being created.\n",
         "    \"\"\"\n",
         "    self.path = path\n",
@@ -365,7 +365,7 @@
         "      random.shuffle(pairs)\n",
         "\n",
         "    for path, name in pairs:\n",
-        "      video_frames = frames_from_video_file(path, self.n_frames) \n",
+        "      video_frames = frames_from_video_file(path, self.n_frames)\n",
         "      label = self.class_ids_for_name[name] # Encode labels\n",
         "      yield video_frames, label"
       ]
@@ -380,8 +380,8 @@
       "source": [
         "URL = 'https://storage.googleapis.com/thumos14_files/UCF101_videos.zip'\n",
         "download_dir = pathlib.Path('./UCF101_subset/')\n",
-        "subset_paths = download_ufc_101_subset(URL, \n",
-        "                        num_classes = 10, \n",
+        "subset_paths = download_ufc_101_subset(URL,\n",
+        "                        num_classes = 10,\n",
         "                        splits = {\"train\": 30, \"val\": 10, \"test\": 10},\n",
         "                        download_dir = download_dir)"
       ]
@@ -447,7 +447,7 @@
         "\n",
         "![(2+1)D convolutions](https://www.tensorflow.org/images/tutorials/video/2plus1CNN.png)\n",
         "\n",
-        "The main advantage of this approach is that it reduces the number of parameters. In the (2 + 1)D convolution the spatial convolution takes in data of the shape `(1, width, height)`, while the temporal convolution takes in data of the shape `(time, 1, 1)`. For example, a (2 + 1)D convolution with kernel size `(3 x 3 x 3)` would need weight matrices of size `(9 * channels**2) + (3 * channels**2)`, less than half as many as the full 3D convolution. This tutorial implements (2 + 1)D ResNet18, where each convolution in the resnet is replaced by a (2+1)D convolution."
+        "The main advantage of this approach is that it reduces the number of parameters. In the (2 + 1)D convolution the spatial convolution takes in data of the shape `(1, width, height)`, while the temporal convolution takes in data of the shape `(time, 1, 1)`. For example, a (2 + 1)D convolution with kernel size `(3 x 3 x 3)` would need weight matrices of size `(9 * channels**2) + (3 * channels**2)`, less than half as many as the full 3D convolution. This tutorial implements (2 + 1)D ResNet18, where each convolution in the ResNet is replaced by a (2+1)D convolution."
       ]
     },
     {
@@ -499,7 +499,7 @@
         "id": "I-fCAddqEORZ"
       },
       "source": [
-        "A ResNet model resnet model is made from a sequence of residual blocks.\n",
+        "A ResNet model is made from a sequence of residual blocks.\n",
         "A residual block has two branches. The main branch performs the calculatoion, but is difficult for gradients to flow through.\n",
         "The residual branch bypasses the main calculation and mostly just adds the input to the output of the main branch.\n",
         "Gradients flow easily through this branch.\n",
@@ -530,7 +530,7 @@
         "                    padding='same'),\n",
         "        layers.LayerNormalization(),\n",
         "        layers.ReLU(),\n",
-        "        Conv2Plus1D(filters=filters, \n",
+        "        Conv2Plus1D(filters=filters,\n",
         "                    kernel_size=kernel_size,\n",
         "                    padding='same'),\n",
         "        layers.LayerNormalization()\n",
@@ -559,8 +559,8 @@
       "source": [
         "class Project(keras.layers.Layer):\n",
         "  \"\"\"\n",
-        "    Project certain dimensions of the tensor as the data is passed through different \n",
-        "    sized filters and downsampled. \n",
+        "    Project certain dimensions of the tensor as the data is passed through different\n",
+        "    sized filters and downsampled.\n",
         "  \"\"\"\n",
         "  def __init__(self, units):\n",
         "    super().__init__()\n",
@@ -595,9 +595,9 @@
         "    Add residual blocks to the model. If the last dimensions of the input data\n",
         "    and filter size does not match, project it such that last dimension matches.\n",
         "  \"\"\"\n",
-        "  out = ResidualMain(filters, \n",
+        "  out = ResidualMain(filters,\n",
         "                     kernel_size)(input)\n",
-        "  \n",
+        "\n",
         "  res = input\n",
         "  # Using the Keras functional APIs, project the last dimension of the tensor to\n",
         "  # match the new filter size\n",
@@ -633,7 +633,7 @@
         "\n",
         "  def call(self, video):\n",
         "    \"\"\"\n",
-        "      Use the einops library to resize the tensor.  \n",
+        "      Use the einops library to resize the tensor.\n",
         "      \n",
         "      Args:\n",
         "        video: Tensor representation of the video, in the form of a set of frames.\n",
@@ -743,8 +743,8 @@
       },
       "outputs": [],
       "source": [
-        "model.compile(loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True), \n",
-        "              optimizer = keras.optimizers.Adam(learning_rate = 0.0001), \n",
+        "model.compile(loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "              optimizer = keras.optimizers.Adam(learning_rate = 0.0001),\n",
         "              metrics = ['accuracy'])"
       ]
     },
@@ -813,7 +813,7 @@
         "\n",
         "  ax1.set_ylim([0, np.ceil(max_loss)])\n",
         "  ax1.set_xlabel('Epoch')\n",
-        "  ax1.legend(['Train', 'Validation']) \n",
+        "  ax1.legend(['Train', 'Validation'])\n",
         "\n",
         "  # Plot accuracy\n",
         "  ax2.set_title('Accuracy')\n",
@@ -837,7 +837,7 @@
       "source": [
         "## Evaluate the model\n",
         "\n",
-        "Use Keras `Model.evaluate` to get the loss and accuracy on the test dataset. \n",
+        "Use Keras `Model.evaluate` to get the loss and accuracy on the test dataset.\n",
         "\n",
         "Note: The example model in this tutorial uses a subset of the UCF101 dataset to keep training time reasonable. The accuracy and loss can be improved with further hyperparameter tuning or more training data. "
       ]
@@ -870,7 +870,7 @@
       },
       "outputs": [],
       "source": [
-        "def get_actual_predicted_labels(dataset): \n",
+        "def get_actual_predicted_labels(dataset):\n",
         "  \"\"\"\n",
         "    Create a list of actual ground truth values and the predictions from the model.\n",
         "\n",
@@ -968,7 +968,7 @@
         "def calculate_classification_metrics(y_actual, y_pred, labels):\n",
         "  \"\"\"\n",
         "    Calculate the precision and recall of a classification model using the ground truth and\n",
-        "    predicted values. \n",
+        "    predicted values.\n",
         "\n",
         "    Args:\n",
         "      y_actual: Ground truth labels.\n",
@@ -989,7 +989,7 @@
         "    row = cm[i, :]\n",
         "    fn = np.sum(row) - tp[i] # Sum of row minus true positive, is false negative\n",
         "    \n",
-        "    precision[labels[i]] = tp[i] / (tp[i] + fp) # Precision \n",
+        "    precision[labels[i]] = tp[i] / (tp[i] + fp) # Precision\n",
         "    \n",
         "    recall[labels[i]] = tp[i] / (tp[i] + fn) # Recall\n",
         "  \n",