8
8
9
9
10
10
class HMDB51 (VisionDataset ):
11
+ """
12
+ HMDB51 <http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/>`_
13
+ dataset.
14
+
15
+ HMDB51 is an action recognition video dataset.
16
+ This dataset consider every video as a collection of video clips of fixed size, specified
17
+ by ``frames_per_clip``, where the step in frames between each clip is given by
18
+ ``step_between_clips``.
19
+
20
+ To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
21
+ and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
22
+ elements will come from video 1, and the next three elements from video 2.
23
+ Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
24
+ frames in a video might be present.
25
+
26
+ Internally, it uses a VideoClips object to handle clip creation.
27
+
28
+ Args:
29
+ root (string): Root directory of the HMDB51 Dataset.
30
+ annotation_path (str): path to the folder containing the split files
31
+ frames_per_clip (int): number of frames in a clip.
32
+ step_between_clips (int): number of frames between each clip.
33
+ fold (int, optional): which fold to use. Should be between 1 and 3.
34
+ train (bool, optional): if ``True``, creates a dataset from the train split,
35
+ otherwise from the ``test`` split.
36
+ transform (callable, optional): A function/transform that takes in a TxHxWxC video
37
+ and returns a transformed version.
38
+
39
+ Returns:
40
+ video (Tensor[T, H, W, C]): the `T` video frames
41
+ audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
42
+ and `L` is the number of points
43
+ label (int): class of the video clip
44
+ """
11
45
12
46
data_url = "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar"
13
47
splits = {
@@ -16,8 +50,11 @@ class HMDB51(VisionDataset):
16
50
}
17
51
18
52
def __init__ (self , root , annotation_path , frames_per_clip , step_between_clips = 1 ,
19
- fold = 1 , train = True ):
53
+ fold = 1 , train = True , transform = None ):
20
54
super (HMDB51 , self ).__init__ (root )
55
+ if not 1 <= fold <= 3 :
56
+ raise ValueError ("fold should be between 1 and 3, got {}" .format (fold ))
57
+
21
58
extensions = ('avi' ,)
22
59
self .fold = fold
23
60
self .train = train
@@ -30,6 +67,7 @@ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1,
30
67
video_clips = VideoClips (video_list , frames_per_clip , step_between_clips )
31
68
indices = self ._select_fold (video_list , annotation_path , fold , train )
32
69
self .video_clips = video_clips .subset (indices )
70
+ self .transform = transform
33
71
34
72
def _select_fold (self , video_list , annotation_path , fold , train ):
35
73
target_tag = 1 if train else 2
@@ -53,4 +91,7 @@ def __getitem__(self, idx):
53
91
video , audio , info , video_idx = self .video_clips .get_clip (idx )
54
92
label = self .samples [video_idx ][1 ]
55
93
94
+ if self .transform is not None :
95
+ video = self .transform (video )
96
+
56
97
return video , audio , label
0 commit comments