|
59 | 59 | "id": "L2MHy42s5wl6"
|
60 | 60 | },
|
61 | 61 | "source": [
|
62 |
| - "# Video classification with a 3D convolutional neural network (CNN)\n", |
| 62 | + "# Video classification with a 3D convolutional neural network\n", |
63 | 63 | "\n",
|
64 |
| - "This tutorial demonstrates training a 3D convolutional neural network for video classification using the [UCF101](https://www.crcv.ucf.edu/data/UCF101.php) action recognition dataset. A 3D CNN uses a three dimensional filter to perform convolutions. The kernel is able to slide in three directions, whereas in a 2D CNN it can slide in two dimensions. The model is based on the work published in [A Closer Look at Spatiotemporal Convolutions for Action Recognition](https://arxiv.org/abs/1711.11248v3) by D. Tran et al. (2017). In this tutorial, you will: \n", |
| 64 | + "This tutorial demonstrates training a 3D convolutional neural network (CNN) for video classification using the [UCF101](https://www.crcv.ucf.edu/data/UCF101.php) action recognition dataset. A 3D CNN uses a three-dimensional filter to perform convolutions. The kernel is able to slide in three directions, whereas in a 2D CNN it can slide in two dimensions. The model is based on the work published in [A Closer Look at Spatiotemporal Convolutions for Action Recognition](https://arxiv.org/abs/1711.11248v3) by D. Tran et al. (2017). In this tutorial, you will:\n", |
65 | 65 | "\n",
|
66 | 66 | "* Build an input pipeline\n",
|
67 | 67 | "* Build a 3D convolutional neural network model with residual connections using Keras functional API\n",
|
68 | 68 | "* Train the model\n",
|
69 |
| - "* Evaluate and test the model \n", |
| 69 | + "* Evaluate and test the model\n", |
70 | 70 | "\n",
|
71 | 71 | "This video classification tutorial is the second part in a series of TensorFlow video tutorials. Here are the other three tutorials:\n",
|
72 | 72 | "\n",
|
|
378 | 378 | "download_dir = pathlib.Path('./UCF101_subset/')\n",
|
379 | 379 | "subset_paths = download_ufc_101_subset(URL, \n",
|
380 | 380 | " num_classes = 10, \n",
|
381 |
| - " splits = {\"train\": 30, \"val\": 10, \"test\": 10}, \n", |
| 381 | + " splits = {\"train\": 30, \"val\": 10, \"test\": 10},\n", |
382 | 382 | " download_dir = download_dir)"
|
383 | 383 | ]
|
384 | 384 | },
|
|
400 | 400 | },
|
401 | 401 | "outputs": [],
|
402 | 402 | "source": [
|
| 403 | + "n_frames = 10\n", |
| 404 | + "batch_size = 8\n", |
| 405 | + "\n", |
403 | 406 | "output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),\n",
|
404 | 407 | " tf.TensorSpec(shape = (), dtype = tf.int16))\n",
|
405 |
| - "train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['train'], 10, training = True),\n", |
| 408 | + "\n", |
| 409 | + "train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['train'], n_frames, training=True),\n", |
406 | 410 | " output_signature = output_signature)\n",
|
407 | 411 | "\n",
|
| 412 | + "\n", |
408 | 413 | "# Batch the data\n",
|
409 |
| - "train_ds = train_ds.batch(8)\n", |
| 414 | + "train_ds = train_ds.batch(batch_size)\n", |
410 | 415 | "\n",
|
411 |
| - "val_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['val'], 10),\n", |
| 416 | + "val_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['val'], n_frames),\n", |
412 | 417 | " output_signature = output_signature)\n",
|
413 |
| - "val_ds = val_ds.batch(8)\n", |
| 418 | + "val_ds = val_ds.batch(batch_size)\n", |
414 | 419 | "\n",
|
415 |
| - "test_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['test'], 10),\n", |
| 420 | + "test_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['test'], n_frames),\n", |
416 | 421 | " output_signature = output_signature)\n",
|
417 | 422 | "\n",
|
418 |
| - "test_ds = test_ds.batch(8)" |
| 423 | + "test_ds = test_ds.batch(batch_size)" |
419 | 424 | ]
|
420 | 425 | },
|
421 | 426 | {
|
|
898 | 903 | " ax.set_xlabel('Predicted Action')\n",
|
899 | 904 | " ax.set_ylabel('Actual Action')\n",
|
900 | 905 | " plt.xticks(rotation=90)\n",
|
901 |
| - " plt.yticks(rotation=0) \n", |
| 906 | + " plt.yticks(rotation=0)\n", |
902 | 907 | " ax.xaxis.set_ticklabels(labels)\n",
|
903 | 908 | " ax.yaxis.set_ticklabels(labels)"
|
904 | 909 | ]
|
|
911 | 916 | },
|
912 | 917 | "outputs": [],
|
913 | 918 | "source": [
|
914 |
| - "fg = FrameGenerator(subset_paths['train'], num_frames, training = True)\n", |
915 |
| - "label_names = list(fg.class_ids_for_name.keys())" |
| 919 | + "fg = FrameGenerator(subset_paths['train'], n_frames, training=True)\n", |
| 920 | + "labels = list(fg.class_ids_for_name.keys())" |
916 | 921 | ]
|
917 | 922 | },
|
918 | 923 | {
|
|
945 | 950 | "id": "FefzeIZz-9aI"
|
946 | 951 | },
|
947 | 952 | "source": [
|
948 |
| - "The precision and recall values for each class can also be calculated using a confusion matrix. " |
| 953 | + "The precision and recall values for each class can also be calculated using a confusion matrix." |
949 | 954 | ]
|
950 | 955 | },
|
951 | 956 | {
|
|
1026 | 1031 | "id": "d4WsP4Z2HZ6L"
|
1027 | 1032 | },
|
1028 | 1033 | "source": [
|
1029 |
| - "## Next Steps\n", |
| 1034 | + "## Next steps\n", |
1030 | 1035 | "\n",
|
1031 | 1036 | "To learn more about working with video data in TensorFlow, check out the following tutorials:\n",
|
1032 | 1037 | "\n",
|
|
0 commit comments