Skip to content

Commit bfb373d

Browse files
shilpakancharlacopybara-github
authored andcommitted
Add Video classification with 3D CNN tutorial
PiperOrigin-RevId: 482618613
1 parent 4a7c942 commit bfb373d

File tree

3 files changed

+1077
-14
lines changed

3 files changed

+1077
-14
lines changed

site/en/tutorials/_toc.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ toc:
9797
- title: "Distributed input"
9898
path: /tutorials/distribute/input
9999

100-
- title: "Images"
100+
- title: "Vision"
101101
style: accordion
102102
section:
103103
- title: "Convolutional Neural Network"
@@ -115,6 +115,9 @@ toc:
115115
- title: "Object detection with TF Hub"
116116
path: /hub/tutorials/tf2_object_detection
117117
status: external
118+
- title: "Video classification"
119+
status: new
120+
path: /tutorials/video/video_classification
118121

119122
- title: "Text"
120123
style: accordion

site/en/tutorials/load_data/video.ipynb

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@
239239
"outputs": [],
240240
"source": [
241241
"def get_files_per_class(files):\n",
242-
" \"\"\" Retrieve the files that belong to each class. \n",
242+
" \"\"\" Retrieve the files that belong to each class.\n",
243243
"\n",
244244
" Args:\n",
245245
" files: List of files in the dataset.\n",
@@ -553,10 +553,34 @@
553553
"id": "D1vvyT0F7JAZ"
554554
},
555555
"source": [
556-
"The following function splits the videos into frames, reads a randomly chosen span of `n_frames` out of a video file, and returns them as a NumPy `array`.\n",
556+
"The `frames_from_video_file` function splits the videos into frames, reads a randomly chosen span of `n_frames` out of a video file, and returns them as a NumPy `array`.\n",
557557
"To reduce memory and computation overhead, choose a **small** number of frames. In addition, pick the **same** number of frames from each video, which makes it easier to work on batches of data.\n"
558558
]
559559
},
560+
{
561+
"cell_type": "code",
562+
"execution_count": null,
563+
"metadata": {
564+
"id": "vNBCiV3bMzpD"
565+
},
566+
"outputs": [],
567+
"source": [
568+
"def format_frames(frame, output_size):\n",
569+
" \"\"\"\n",
570+
" Pad and resize an image from a video.\n",
571+
" \n",
572+
" Args:\n",
573+
" frame: Image that needs to resized and padded. \n",
574+
" output_size: Pixel size of the output frame image.\n",
575+
"\n",
576+
" Return:\n",
577+
" Formatted frame with padding of specified output size.\n",
578+
" \"\"\"\n",
579+
" frame = tf.image.convert_image_dtype(frame, tf.float32)\n",
580+
" frame = tf.image.resize_with_pad(frame, *output_size)\n",
581+
" return frame"
582+
]
583+
},
560584
{
561585
"cell_type": "code",
562586
"execution_count": null,
@@ -565,8 +589,9 @@
565589
},
566590
"outputs": [],
567591
"source": [
568-
"def frames_from_video_file(video_path, n_frames, output_size = (224,224)):\n",
569-
" \"\"\" Creates frames from each video file present for each category.\n",
592+
"def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):\n",
593+
" \"\"\"\n",
594+
" Creates frames from each video file present for each category.\n",
570595
"\n",
571596
" Args:\n",
572597
" video_path: File path to the video.\n",
@@ -576,32 +601,32 @@
576601
" Return:\n",
577602
" An NumPy array of frames in the shape of (n_frames, height, width, channels).\n",
578603
" \"\"\"\n",
579-
" # Read each frame by frame\n",
604+
" # Read each video frame by frame\n",
580605
" result = []\n",
581606
" src = cv2.VideoCapture(str(video_path)) \n",
582607
"\n",
583608
" video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)\n",
584609
"\n",
585-
" # If the number of frames wanted is greater than the length of the video, then start from beginning\n",
586-
" if n_frames > video_length:\n",
610+
" need_length = 1 + (n_frames - 1) * frame_step\n",
611+
"\n",
612+
" if need_length > video_length:\n",
587613
" start = 0\n",
588614
" else:\n",
589-
" # Otherwise, start at another random point within the video\n",
590-
" max_start = video_length - n_frames\n",
591-
" start = random.randint(0, max_start)\n",
615+
" max_start = video_length - need_length\n",
616+
" start = random.randint(0, max_start + 1)\n",
592617
"\n",
593618
" src.set(cv2.CAP_PROP_POS_FRAMES, start)\n",
619+
" ret, frame = src.read()\n",
620+
" result.append(format_frames(frame, output_size))\n",
594621
"\n",
595-
" for _ in range(n_frames):\n",
596-
" ret, frame = src.read()\n",
622+
" for _ in range(n_frames - 1):\n",
597623
" if ret:\n",
598624
" frame = tf.image.convert_image_dtype(frame, tf.float32)\n",
599625
" frame = tf.image.resize_with_pad(frame, *output_size)\n",
600626
" result.append(frame)\n",
601627
" else:\n",
602628
" result.append(np.zeros_like(result[0]))\n",
603629
" src.release()\n",
604-
" # Ensure that the color scheme is not inverted\n",
605630
" result = np.array(result)[..., [2, 1, 0]]\n",
606631
"\n",
607632
" return result"

0 commit comments

Comments
 (0)