|
239 | 239 | "outputs": [],
|
240 | 240 | "source": [
|
241 | 241 | "def get_files_per_class(files):\n",
|
242 |
| - " \"\"\" Retrieve the files that belong to each class. \n", |
| 242 | + " \"\"\" Retrieve the files that belong to each class.\n", |
243 | 243 | "\n",
|
244 | 244 | " Args:\n",
|
245 | 245 | " files: List of files in the dataset.\n",
|
|
553 | 553 | "id": "D1vvyT0F7JAZ"
|
554 | 554 | },
|
555 | 555 | "source": [
|
556 |
| - "The following function splits the videos into frames, reads a randomly chosen span of `n_frames` out of a video file, and returns them as a NumPy `array`.\n", |
| 556 | + "The `frames_from_video_file` function splits the videos into frames, reads a randomly chosen span of `n_frames` out of a video file, and returns them as a NumPy `array`.\n", |
557 | 557 | "To reduce memory and computation overhead, choose a **small** number of frames. In addition, pick the **same** number of frames from each video, which makes it easier to work on batches of data.\n"
|
558 | 558 | ]
|
559 | 559 | },
|
| 560 | + { |
| 561 | + "cell_type": "code", |
| 562 | + "execution_count": null, |
| 563 | + "metadata": { |
| 564 | + "id": "vNBCiV3bMzpD" |
| 565 | + }, |
| 566 | + "outputs": [], |
| 567 | + "source": [ |
| 568 | + "def format_frames(frame, output_size):\n", |
| 569 | + " \"\"\"\n", |
| 570 | + " Pad and resize an image from a video.\n", |
| 571 | + " \n", |
| 572 | + " Args:\n", |
| 573 | + " frame: Image that needs to resized and padded. \n", |
| 574 | + " output_size: Pixel size of the output frame image.\n", |
| 575 | + "\n", |
| 576 | + " Return:\n", |
| 577 | + " Formatted frame with padding of specified output size.\n", |
| 578 | + " \"\"\"\n", |
| 579 | + " frame = tf.image.convert_image_dtype(frame, tf.float32)\n", |
| 580 | + " frame = tf.image.resize_with_pad(frame, *output_size)\n", |
| 581 | + " return frame" |
| 582 | + ] |
| 583 | + }, |
560 | 584 | {
|
561 | 585 | "cell_type": "code",
|
562 | 586 | "execution_count": null,
|
|
565 | 589 | },
|
566 | 590 | "outputs": [],
|
567 | 591 | "source": [
|
568 |
| - "def frames_from_video_file(video_path, n_frames, output_size = (224,224)):\n", |
569 |
| - " \"\"\" Creates frames from each video file present for each category.\n", |
| 592 | + "def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):\n", |
| 593 | + " \"\"\"\n", |
| 594 | + " Creates frames from each video file present for each category.\n", |
570 | 595 | "\n",
|
571 | 596 | " Args:\n",
|
572 | 597 | " video_path: File path to the video.\n",
|
|
576 | 601 | " Return:\n",
|
577 | 602 | " An NumPy array of frames in the shape of (n_frames, height, width, channels).\n",
|
578 | 603 | " \"\"\"\n",
|
579 |
| - " # Read each frame by frame\n", |
| 604 | + " # Read each video frame by frame\n", |
580 | 605 | " result = []\n",
|
581 | 606 | " src = cv2.VideoCapture(str(video_path)) \n",
|
582 | 607 | "\n",
|
583 | 608 | " video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)\n",
|
584 | 609 | "\n",
|
585 |
| - " # If the number of frames wanted is greater than the length of the video, then start from beginning\n", |
586 |
| - " if n_frames > video_length:\n", |
| 610 | + " need_length = 1 + (n_frames - 1) * frame_step\n", |
| 611 | + "\n", |
| 612 | + " if need_length > video_length:\n", |
587 | 613 | " start = 0\n",
|
588 | 614 | " else:\n",
|
589 |
| - " # Otherwise, start at another random point within the video\n", |
590 |
| - " max_start = video_length - n_frames\n", |
591 |
| - " start = random.randint(0, max_start)\n", |
| 615 | + " max_start = video_length - need_length\n", |
| 616 | + " start = random.randint(0, max_start + 1)\n", |
592 | 617 | "\n",
|
593 | 618 | " src.set(cv2.CAP_PROP_POS_FRAMES, start)\n",
|
| 619 | + " ret, frame = src.read()\n", |
| 620 | + " result.append(format_frames(frame, output_size))\n", |
594 | 621 | "\n",
|
595 |
| - " for _ in range(n_frames):\n", |
596 |
| - " ret, frame = src.read()\n", |
| 622 | + " for _ in range(n_frames - 1):\n", |
597 | 623 | " if ret:\n",
|
598 | 624 | " frame = tf.image.convert_image_dtype(frame, tf.float32)\n",
|
599 | 625 | " frame = tf.image.resize_with_pad(frame, *output_size)\n",
|
600 | 626 | " result.append(frame)\n",
|
601 | 627 | " else:\n",
|
602 | 628 | " result.append(np.zeros_like(result[0]))\n",
|
603 | 629 | " src.release()\n",
|
604 |
| - " # Ensure that the color scheme is not inverted\n", |
605 | 630 | " result = np.array(result)[..., [2, 1, 0]]\n",
|
606 | 631 | "\n",
|
607 | 632 | " return result"
|
|
0 commit comments