-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathinput_functions.py
More file actions
133 lines (115 loc) · 4.5 KB
/
input_functions.py
File metadata and controls
133 lines (115 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json
import numpy as np
import os
import tensorflow as tf
FLAGS = tf.flags.FLAGS
def sample(query_id, query_gt, query_label, query_loc, all_ids, all_gts,
all_labels, all_locs, is_training):
"""For each query video, random sample a reference video.
Args:
query_id: Query video id.
query_gt: The action segment in the query video.
query_label: The action category of the query video.
query_loc: The location of the video segment in the original whole video.
all_ids: All video ids.
all_gts: The action segments in all the videos.
all_labels: The action categories of all the videos.
all_locs: The locations of the all videos in their corresponding original
whole video.
is_training: Whether in training mode.
Returns:
query_id: Query video id.
query_loc: The action segment in the original whole video.
chosen_id: The reference video id.
chosen_gt: The action segment in the reference video.
chosen_loc: The location of the reference video in the original whole video.
"""
same = tf.equal(all_labels, query_label)
longer = tf.less_equal(query_gt[1] - query_gt[0],
all_locs[:, 1] - all_locs[:, 0])
same = tf.logical_and(same, longer)
same = tf.where(same)
num = tf.shape(same)[0]
idx = tf.random_uniform([], maxval=num, dtype=tf.int32,
seed=None if is_training else 6)
idx = same[idx, 0]
chosen_id = all_ids[idx]
chosen_gt = all_gts[idx]
chosen_loc = all_locs[idx]
# Data augmentation during training.
if is_training:
off_st = tf.random_uniform([], maxval=chosen_gt[0] + 1, dtype=tf.int32)
maxval = chosen_loc[1] - chosen_loc[0] - chosen_gt[1] + 1
off_en = tf.random_uniform([], maxval=maxval, dtype=tf.int32)
use_off = tf.random_uniform([])
off_st = tf.cond(use_off < 0.9, lambda: off_st, lambda: 0)
off_en = tf.cond(use_off < 0.9, lambda: off_en, lambda: 0)
off_gt = tf.stack([-off_st, -off_st])
off_loc = tf.stack([off_st, -off_en])
chosen_gt += off_gt
chosen_loc += off_loc
return query_id, query_gt + query_loc[0], chosen_id, chosen_gt, chosen_loc
def batching_func(x, batch_size):
return x.padded_batch(
batch_size,
padded_shapes=(
tf.TensorShape([None, FLAGS.feat_dim]),
tf.TensorShape([]),
tf.TensorShape([None, FLAGS.feat_dim]),
tf.TensorShape([]),
tf.TensorShape([2])))
def input_fn(subset, batch_size):
is_training = subset == 'train'
with open(os.path.join('data', subset + '.json'), 'r') as f:
data = json.load(f)
videos = [[] for _ in range(4)]
for i in data:
videos[0].append(i['id'])
videos[1].append(i['groundtruth'])
videos[2].append(i['label'])
videos[3].append(i['location'])
for i in range(4):
videos[i] = tf.convert_to_tensor(videos[i])
dataset = tf.data.Dataset.from_tensor_slices(tuple(videos))
if is_training:
dataset = dataset.repeat()
dataset = dataset.shuffle(1024)
dataset = dataset.map(
lambda v, t, l, d: sample(v, t, l, d, *videos, is_training=is_training))
dataset = dataset.map(
lambda v1, t1, v2, t2, l2: tuple(
tf.py_func(get_data, [FLAGS.data_dir, v1, t1, v2, t2, l2],
[tf.float32, tf.int32, tf.float32, tf.int32, tf.int32])))
if is_training:
def key_func(unused_1, len1, unused_2, len2, unused_3):
id2 = len2 // FLAGS.bucket_span
return tf.to_int64(id2)
def reduce_func(unused_key, windowed_data):
return batching_func(windowed_data, batch_size)
batched_dataset = dataset.apply(
tf.contrib.data.group_by_window(
key_func=key_func, reduce_func=reduce_func, window_size=batch_size))
else:
batched_dataset = batching_func(dataset, batch_size)
dataset = batched_dataset.map(reorder_func)
dataset = dataset.prefetch(4)
return dataset
def get_data(data_dir, v1, t1, v2, t2, l2):
"""Read the video features."""
feat1 = np.load('%s/feat/v_%s.npy' % (data_dir, v1))
feat2 = np.load('%s/feat/v_%s.npy' % (data_dir, v2))
len1 = t1[1] - t1[0]
len2 = l2[1] - l2[0]
ret1 = feat1[t1[0]:t1[1]]
ret2 = feat2[l2[0]:l2[1]]
assert len1 == ret1.shape[0]
assert len2 == ret2.shape[0]
assert np.all(t2 >= 0) and np.all(t2 <= len2)
return ret1, len1, ret2, len2, t2
def reorder_func(v1, l1, v2, l2, label):
# v1.set_shape([FLAGS.batch_size, None, FLAGS.feat_dim])
# l1.set_shape([FLAGS.batch_size])
# v2.set_shape([FLAGS.batch_size, None, FLAGS.feat_dim])
# l2.set_shape([FLAGS.batch_size])
# label.set_shape([FLAGS.batch_size, 2])
return (v1, l1, v2, l2), label