Skip to content

Commit 8de93a1

Browse files
[SYSTEMDS-3887] Change hyperparameter tuning strategy to bayesian optimization
This patch changes the hyperparameter tuning in Scuro from grid search to bayesian optimization.
1 parent 0e8e966 commit 8de93a1

File tree

17 files changed

+140
-87
lines changed

17 files changed

+140
-87
lines changed

.github/workflows/python.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ jobs:
172172
gensim \
173173
opt-einsum \
174174
nltk \
175-
fvcore
175+
fvcore \
176+
scikit-optimize
176177
kill $KA
177178
cd src/main/python
178179
python -m unittest discover -s tests/scuro -p 'test_*.py' -v

src/main/python/systemds/scuro/dataloader/video_loader.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,6 @@ def __init__(
4545

4646
def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
4747
self.file_sanity_check(file)
48-
# if not self.load_data_from_file:
49-
# self.metadata[file] = self.modality_type.create_metadata(
50-
# 30, 10, 100, 100, 3
51-
# )
52-
# else:
5348
cap = cv2.VideoCapture(file)
5449

5550
if not cap.isOpened():
@@ -71,13 +66,7 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
7166
self.fps, length, width, height, num_channels
7267
)
7368

74-
num_frames = (length + frame_interval - 1) // frame_interval
75-
76-
stacked_frames = np.zeros(
77-
(num_frames, height, width, num_channels), dtype=self._data_type
78-
)
79-
80-
frame_idx = 0
69+
frames = []
8170
idx = 0
8271
while cap.isOpened():
8372
ret, frame = cap.read()
@@ -87,11 +76,7 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
8776
if idx % frame_interval == 0:
8877
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
8978
frame = frame.astype(self._data_type) / 255.0
90-
stacked_frames[frame_idx] = frame
91-
frame_idx += 1
79+
frames.append(frame)
9280
idx += 1
9381

94-
if frame_idx < num_frames:
95-
stacked_frames = stacked_frames[:frame_idx]
96-
97-
self.data.append(stacked_frames)
82+
self.data.append(np.stack(frames))

src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@
1919
#
2020
# -------------------------------------------------------------
2121
from typing import Dict, List, Tuple, Any, Optional
22-
import numpy as np
23-
from sklearn.model_selection import ParameterGrid
22+
from skopt import gp_minimize
23+
from skopt.space import Real, Integer, Categorical
24+
from skopt.utils import use_named_args
2425
import json
2526
import logging
2627
from dataclasses import dataclass
2728
import time
2829
import copy
2930

3031
from systemds.scuro.modality.modality import Modality
31-
from systemds.scuro.drsearch.task import Task
3232

3333

3434
@dataclass
@@ -163,18 +163,64 @@ def visit_node(node_id):
163163
start_time = time.time()
164164
rep_name = "_".join([rep.__name__ for rep in reps])
165165

166-
param_grid = list(ParameterGrid(hyperparams))
167-
if max_evals and len(param_grid) > max_evals:
168-
np.random.shuffle(param_grid)
169-
param_grid = param_grid[:max_evals]
166+
search_space = []
167+
param_names = []
168+
for param_name, param_values in hyperparams.items():
169+
param_names.append(param_name)
170+
if isinstance(param_values, list):
171+
if all(isinstance(v, (int, float)) for v in param_values):
172+
if all(isinstance(v, int) for v in param_values):
173+
search_space.append(
174+
Integer(
175+
min(param_values), max(param_values), name=param_name
176+
)
177+
)
178+
else:
179+
search_space.append(
180+
Real(min(param_values), max(param_values), name=param_name)
181+
)
182+
else:
183+
search_space.append(Categorical(param_values, name=param_name))
184+
elif isinstance(param_values, tuple) and len(param_values) == 2:
185+
if isinstance(param_values[0], int) and isinstance(
186+
param_values[1], int
187+
):
188+
search_space.append(
189+
Integer(param_values[0], param_values[1], name=param_name)
190+
)
191+
else:
192+
search_space.append(
193+
Real(param_values[0], param_values[1], name=param_name)
194+
)
195+
else:
196+
search_space.append(Categorical([param_values], name=param_name))
197+
198+
n_calls = max_evals if max_evals else 50
170199

171200
all_results = []
172-
for params in param_grid:
201+
202+
@use_named_args(search_space)
203+
def objective(**params):
173204
result = self.evaluate_dag_config(
174205
dag, params, node_order, modality_ids, task
175206
)
176207
all_results.append(result)
177208

209+
score = result[1].average_scores[self.scoring_metric]
210+
if self.maximize_metric:
211+
return -score
212+
else:
213+
return score
214+
215+
result = gp_minimize(
216+
objective,
217+
search_space,
218+
n_calls=n_calls,
219+
random_state=42,
220+
verbose=self.debug,
221+
n_initial_points=min(10, n_calls // 2),
222+
)
223+
178224
if self.maximize_metric:
179225
best_params, best_score = max(
180226
all_results, key=lambda x: x[1].average_scores[self.scoring_metric]

src/main/python/systemds/scuro/modality/unimodal_modality.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -156,26 +156,28 @@ def apply_representation(self, representation):
156156
if current_length < target_length:
157157
padding_needed = target_length - current_length
158158
if pad_dim_one:
159-
padding = np.zeros((embeddings.shape[0], padding_needed))
160-
padded_embeddings.append(
161-
np.concatenate((embeddings, padding), axis=1)
159+
padded = np.pad(
160+
embeddings,
161+
((0, 0), (0, padding_needed)),
162+
mode="constant",
163+
constant_values=0,
162164
)
165+
padded_embeddings.append(padded)
163166
else:
164167
if len(embeddings.shape) == 1:
165-
padded = np.zeros(
166-
embeddings.shape[0] + padding_needed,
167-
dtype=embeddings.dtype,
168+
padded = np.pad(
169+
embeddings,
170+
(0, padding_needed),
171+
mode="constant",
172+
constant_values=0,
168173
)
169-
padded[: embeddings.shape[0]] = embeddings
170174
else:
171-
padded = np.zeros(
172-
(
173-
embeddings.shape[0] + padding_needed,
174-
embeddings.shape[1],
175-
),
176-
dtype=embeddings.dtype,
175+
padded = np.pad(
176+
embeddings,
177+
((0, padding_needed), (0, 0)),
178+
mode="constant",
179+
constant_values=0,
177180
)
178-
padded[: embeddings.shape[0], :] = embeddings
179181
padded_embeddings.append(padded)
180182
else:
181183
padded_embeddings.append(embeddings)

src/main/python/systemds/scuro/representations/bow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
@register_representation(ModalityType.TEXT)
3333
class BoW(UnimodalRepresentation):
3434
def __init__(self, ngram_range=2, min_df=2, output_file=None):
35-
parameters = {"ngram_range": [ngram_range], "min_df": [min_df]}
35+
parameters = {"ngram_range": [2, 3, 5, 10], "min_df": [1, 2, 4, 8]}
3636
super().__init__("BoW", ModalityType.EMBEDDING, parameters)
3737
self.ngram_range = int(ngram_range)
3838
self.min_df = int(min_df)

src/main/python/systemds/scuro/representations/clip.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from systemds.scuro.utils.torch_dataset import CustomDataset
3535

3636

37-
@register_representation(ModalityType.VIDEO)
37+
@register_representation([ModalityType.VIDEO, ModalityType.IMAGE])
3838
class CLIPVisual(UnimodalRepresentation):
3939
def __init__(self, output_file=None):
4040
parameters = {}
@@ -46,8 +46,10 @@ def __init__(self, output_file=None):
4646
self.output_file = output_file
4747

4848
def transform(self, modality):
49-
transformed_modality = TransformedModality(modality, self)
50-
self.data_type = numpy_dtype_to_torch_dtype(modality.data_type)
49+
transformed_modality = TransformedModality(
50+
modality, self, self.output_modality_type
51+
)
52+
self.data_type = torch.float32
5153
if next(self.model.parameters()).dtype != self.data_type:
5254
self.model = self.model.to(self.data_type)
5355

@@ -60,14 +62,20 @@ def transform(self, modality):
6062
return transformed_modality
6163

6264
def create_visual_embeddings(self, modality):
63-
tf = transforms.Compose([transforms.ToPILImage(), transforms.ToTensor()])
65+
66+
clip_transform = transforms.Compose(
67+
[
68+
transforms.ToPILImage(),
69+
transforms.Resize(256),
70+
transforms.CenterCrop(224),
71+
transforms.ToTensor(),
72+
transforms.ConvertImageDtype(dtype=self.data_type),
73+
]
74+
)
6475
dataset = CustomDataset(
65-
modality.data,
66-
self.data_type,
67-
get_device(),
68-
(modality.metadata[0]["width"], modality.metadata[0]["height"]),
69-
tf=tf,
76+
modality.data, self.data_type, get_device(), tf=clip_transform
7077
)
78+
7179
embeddings = {}
7280
for instance in torch.utils.data.DataLoader(dataset):
7381
id = int(instance["id"][0])
@@ -94,7 +102,7 @@ def create_visual_embeddings(self, modality):
94102
.cpu()
95103
.float()
96104
.numpy()
97-
.astype(modality.data_type)
105+
.astype(np.float32)
98106
)
99107

100108
embeddings[id] = np.array(embeddings[id])
@@ -113,7 +121,9 @@ def __init__(self, output_file=None):
113121
self.output_file = output_file
114122

115123
def transform(self, modality):
116-
transformed_modality = TransformedModality(modality, self)
124+
transformed_modality = TransformedModality(
125+
modality, self, self.output_modality_type
126+
)
117127

118128
embeddings = self.create_text_embeddings(modality.data, self.model)
119129

src/main/python/systemds/scuro/representations/fusion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ def transform_with_training(self, modalities: List[Modality], task):
9191
transformed_data = np.zeros(
9292
(len(modalities[0].data), transformed_train.shape[1])
9393
)
94-
transformed_data[task.train_indices] = transformed_train
95-
transformed_data[task.test_indices] = transformed_other
94+
transformed_data[fusion_train_indices] = transformed_train
95+
transformed_data[all_other_indices] = transformed_other
9696

9797
return transformed_data
9898

src/main/python/systemds/scuro/representations/lstm.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,11 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
188188
criterion = nn.CrossEntropyLoss()
189189
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
190190

191-
X_tensor = torch.FloatTensor(X).to(device)
191+
X_tensor = torch.FloatTensor(X)
192192
if self.is_multilabel:
193-
y_tensor = torch.FloatTensor(y).to(device)
193+
y_tensor = torch.FloatTensor(y)
194194
else:
195-
y_tensor = torch.LongTensor(y).to(device)
195+
y_tensor = torch.LongTensor(y)
196196

197197
dataset = TensorDataset(X_tensor, y_tensor)
198198
dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
@@ -201,6 +201,8 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
201201
for epoch in range(self.epochs):
202202
total_loss = 0
203203
for batch_X, batch_y in dataloader:
204+
batch_X = batch_X.to(device)
205+
batch_y = batch_y.to(device)
204206
optimizer.zero_grad()
205207

206208
features, predictions = self.model(batch_X)
@@ -230,6 +232,7 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
230232
TensorDataset(X_tensor), batch_size=self.batch_size, shuffle=False
231233
)
232234
for (batch_X,) in inference_dataloader:
235+
batch_X = batch_X.to(device)
233236
features, _ = self.model(batch_X)
234237
all_features.append(features.cpu())
235238

@@ -244,14 +247,15 @@ def apply_representation(self, modalities: List[Modality]) -> np.ndarray:
244247
device = get_device()
245248
self.model.to(device)
246249

247-
X_tensor = torch.FloatTensor(X).to(device)
250+
X_tensor = torch.FloatTensor(X)
248251
all_features = []
249252
self.model.eval()
250253
with torch.no_grad():
251254
inference_dataloader = DataLoader(
252255
TensorDataset(X_tensor), batch_size=self.batch_size, shuffle=False
253256
)
254257
for (batch_X,) in inference_dataloader:
258+
batch_X = batch_X.to(device)
255259
features, _ = self.model(batch_X)
256260
all_features.append(features.cpu())
257261

src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,12 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
162162
)
163163

164164
for modality_name in inputs:
165-
inputs[modality_name] = inputs[modality_name].to(device)
165+
inputs[modality_name] = inputs[modality_name]
166166

167167
if self.is_multilabel:
168-
labels_tensor = torch.from_numpy(y).float().to(device)
168+
labels_tensor = torch.from_numpy(y).float()
169169
else:
170-
labels_tensor = torch.from_numpy(y).long().to(device)
170+
labels_tensor = torch.from_numpy(y).long()
171171

172172
dataset_inputs = []
173173
for i in range(len(y)):
@@ -199,9 +199,9 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
199199
for modality_name in batch_inputs:
200200
batch_inputs[modality_name] = torch.stack(
201201
batch_inputs[modality_name]
202-
)
202+
).to(device)
203203

204-
batch_labels = torch.stack(batch_labels)
204+
batch_labels = torch.stack(batch_labels).to(device)
205205

206206
optimizer.zero_grad()
207207

@@ -250,7 +250,9 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
250250

251251
batch_inputs = {}
252252
for modality_name, tensor in inputs.items():
253-
batch_inputs[modality_name] = tensor[batch_start:batch_end]
253+
batch_inputs[modality_name] = tensor[batch_start:batch_end].to(
254+
device
255+
)
254256

255257
encoder_output = self.encoder(batch_inputs)
256258
all_features.append(encoder_output["fused"].cpu())
@@ -266,9 +268,6 @@ def apply_representation(self, modalities: List[Modality]) -> np.ndarray:
266268
device = get_device()
267269
self.encoder.to(device)
268270

269-
for modality_name in inputs:
270-
inputs[modality_name] = inputs[modality_name].to(device)
271-
272271
self.encoder.eval()
273272
all_features = []
274273

@@ -281,7 +280,9 @@ def apply_representation(self, modalities: List[Modality]) -> np.ndarray:
281280

282281
batch_inputs = {}
283282
for modality_name, tensor in inputs.items():
284-
batch_inputs[modality_name] = tensor[batch_start:batch_end]
283+
batch_inputs[modality_name] = tensor[batch_start:batch_end].to(
284+
device
285+
)
285286

286287
encoder_output = self.encoder(batch_inputs)
287288
all_features.append(encoder_output["fused"].cpu())

src/main/python/systemds/scuro/representations/resnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def hook(
163163
.cpu()
164164
.float()
165165
.numpy()
166-
.astype(modality.data_type)
166+
.astype(np.float32)
167167
)
168168

169169
embeddings[video_id] = np.array(embeddings[video_id])

0 commit comments

Comments
 (0)