Skip to content
Merged
14 changes: 14 additions & 0 deletions src/main/python/systemds/scuro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
from systemds.scuro.representations.lstm import LSTM
from systemds.scuro.representations.max import RowMax
from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
from systemds.scuro.representations.multimodal_attention_fusion import (
AttentionFusion,
)
from systemds.scuro.representations.mfcc import MFCC
from systemds.scuro.representations.hadamard import Hadamard
from systemds.scuro.representations.optical_flow import OpticalFlow
Expand Down Expand Up @@ -73,6 +76,12 @@
from systemds.scuro.drsearch.unimodal_representation_optimizer import (
UnimodalRepresentationOptimizer,
)
from systemds.scuro.representations.covarep_audio_features import (
RMSE,
Spectral,
ZeroCrossing,
Pitch,
)
from systemds.scuro.drsearch.multimodal_optimizer import MultimodalOptimizer
from systemds.scuro.drsearch.unimodal_optimizer import UnimodalOptimizer

Expand Down Expand Up @@ -131,4 +140,9 @@
"UnimodalRepresentationOptimizer",
"UnimodalOptimizer",
"MultimodalOptimizer",
"ZeroCrossing",
"Pitch",
"RMSE",
"Spectral",
"AttentionFusion",
]
1 change: 0 additions & 1 deletion src/main/python/systemds/scuro/dataloader/audio_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#
# -------------------------------------------------------------
from typing import List, Optional, Union

import librosa
import numpy as np

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ def optimize_intramodal_representations(self, task):
],
)

# TODO: check if order matters for reused reps - only compute once - check in cache
# TODO: parallelize - whenever an item of len 0 comes along give it to a new thread - merge results
# TODO: change the algorithm so that one representation is used until there is no more representations to add - saves a lot of memory
def optimize_intermodal_representations(self, task):
modality_combos = []
n = len(self.k_best_cache[task.model.name])
Expand All @@ -122,8 +125,7 @@ def generate_extensions(current_combo, remaining_indices):
reuse_fused_representations = False
for i, modality_combo in enumerate(modality_combos):
# clear reuse cache
if i % 5 == 0:
reuse_cache = self.prune_cache(modality_combos[i:], reuse_cache)
reuse_cache = self.prune_cache(modality_combos[i:], reuse_cache)

if i != 0:
reuse_fused_representations = self.is_prefix_match(
Expand Down
13 changes: 13 additions & 0 deletions src/main/python/systemds/scuro/drsearch/operator_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def __new__(cls):
cls._representations[m_type] = []
return cls._instance

def set_fusion_operators(self, fusion_operators):
if isinstance(fusion_operators, list):
self._context_operators = fusion_operators
else:
self._fusion_operators = [fusion_operators]

def add_representation(
self, representation: Representation, modality: ModalityType
):
Expand All @@ -57,6 +63,13 @@ def add_fusion_operator(self, fusion_operator):
def get_representations(self, modality: ModalityType):
return self._representations[modality]

def get_not_self_contained_representations(self, modality: ModalityType):
reps = []
for rep in self.get_representations(modality):
if not rep().self_contained:
reps.append(rep)
return reps

def get_context_operators(self):
# TODO: return modality specific context operations
return self._context_operators
Expand Down
73 changes: 59 additions & 14 deletions src/main/python/systemds/scuro/drsearch/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
#
# -------------------------------------------------------------
import time
from typing import List
from typing import List, Union

from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.representation import Representation
from systemds.scuro.models.model import Model
import numpy as np
from sklearn.model_selection import KFold
Expand Down Expand Up @@ -57,6 +59,8 @@ def __init__(
self.inference_time = []
self.training_time = []
self.expected_dim = 1
self.train_scores = []
self.val_scores = []

def get_train_test_split(self, data):
X_train = [data[i] for i in self.train_indices]
Expand All @@ -73,28 +77,69 @@ def run(self, data):
:param data: The aligned data used in the prediction process
:return: the validation accuracy
"""
self._reset_params()
skf = KFold(n_splits=self.kfold, shuffle=True, random_state=11)

fold = 0
X, y, _, _ = self.get_train_test_split(data)

for train, test in skf.split(X, y):
train_X = np.array(X)[train]
train_y = np.array(y)[train]
test_X = np.array(X)[test]
test_y = np.array(y)[test]
self._run_fold(train_X, train_y, test_X, test_y)
fold += 1

if self.measure_performance:
self.inference_time = np.mean(self.inference_time)
self.training_time = np.mean(self.training_time)

return [np.mean(self.train_scores), np.mean(self.val_scores)]

def _reset_params(self):
self.inference_time = []
self.training_time = []
self.train_scores = []
self.val_scores = []

def _run_fold(self, train_X, train_y, test_X, test_y):
train_start = time.time()
train_score = self.model.fit(train_X, train_y, test_X, test_y)
train_end = time.time()
self.training_time.append(train_end - train_start)
self.train_scores.append(train_score)
test_start = time.time()
test_score = self.model.test(np.array(test_X), test_y)
test_end = time.time()
self.inference_time.append(test_end - test_start)
self.val_scores.append(test_score)

def create_representation_and_run(
self,
representation: Representation,
modalities: Union[List[Modality], Modality],
):
self._reset_params()
skf = KFold(n_splits=self.kfold, shuffle=True, random_state=11)
train_scores = []
test_scores = []

fold = 0
X, y, X_test, y_test = self.get_train_test_split(data)
X, y, _, _ = self.get_train_test_split(data)

for train, test in skf.split(X, y):
train_X = np.array(X)[train]
train_y = np.array(y)[train]
train_start = time.time()
train_score = self.model.fit(train_X, train_y, X_test, y_test)
train_end = time.time()
self.training_time.append(train_end - train_start)
train_scores.append(train_score)
test_start = time.time()
test_score = self.model.test(np.array(X_test), y_test)
test_end = time.time()
self.inference_time.append(test_end - test_start)
test_scores.append(test_score)
test_X = s.transform(np.array(X)[test])
test_y = np.array(y)[test]

if isinstance(modalities, Modality):
rep = modality.apply_representation(representation())
else:
representation().transform(
train_X, train_y
) # TODO: think about a way how to handle masks

self._run_fold(train_X, train_y, test_X, test_y)
fold += 1

if self.measure_performance:
Expand Down
104 changes: 68 additions & 36 deletions src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# -------------------------------------------------------------
import pickle
import time
import copy
from concurrent.futures import ProcessPoolExecutor, as_completed
from dataclasses import dataclass, field, asdict

Expand All @@ -28,11 +29,17 @@

import numpy as np
from systemds.scuro.representations.window_aggregation import WindowAggregation
from systemds.scuro.representations.concatenation import Concatenation
from systemds.scuro.representations.hadamard import Hadamard
from systemds.scuro.representations.sum import Sum

from systemds.scuro.representations.aggregated_representation import (
AggregatedRepresentation,
)
from systemds.scuro import ModalityType, Aggregation
from systemds.scuro.modality.type import ModalityType
from systemds.scuro.modality.modality import Modality
from systemds.scuro.modality.transformed import TransformedModality
from systemds.scuro.representations.aggregate import Aggregation
from systemds.scuro.drsearch.operator_registry import Registry
from systemds.scuro.utils.schema_helpers import get_shape

Expand Down Expand Up @@ -84,7 +91,6 @@ def optimize_parallel(self, n_workers=None):
def optimize(self):
for modality in self.modalities:
local_result = self._process_modality(modality, False)
# self._merge_results(local_result)

def _process_modality(self, modality, parallel):
if parallel:
Expand All @@ -95,43 +101,59 @@ def _process_modality(self, modality, parallel):
local_results = self.operator_performance

context_operators = self.operator_registry.get_context_operators()

for context_operator in context_operators:
context_representation = None
if (
modality.modality_type != ModalityType.TEXT
and modality.modality_type != ModalityType.VIDEO
):
con_op = context_operator()
context_representation = modality.context(con_op)
self._evaluate_local(context_representation, [con_op], local_results)

modality_specific_operators = self.operator_registry.get_representations(
not_self_contained_reps = (
self.operator_registry.get_not_self_contained_representations(
modality.modality_type
)
for modality_specific_operator in modality_specific_operators:
mod_context = None
mod_op = modality_specific_operator()
if context_representation is not None:
mod_context = context_representation.apply_representation(mod_op)
self._evaluate_local(mod_context, [con_op, mod_op], local_results)

mod = modality.apply_representation(mod_op)
self._evaluate_local(mod, [mod_op], local_results)

for context_operator_after in context_operators:
con_op_after = context_operator_after()
if mod_context is not None:
mod_context = mod_context.context(con_op_after)
self._evaluate_local(
mod_context, [con_op, mod_op, con_op_after], local_results
)

mod = mod.context(con_op_after)
self._evaluate_local(mod, [mod_op, con_op_after], local_results)
)
modality_specific_operators = self.operator_registry.get_representations(
modality.modality_type
)
for modality_specific_operator in modality_specific_operators:
mod_op = modality_specific_operator()

mod = modality.apply_representation(mod_op)
self._evaluate_local(mod, [mod_op], local_results)

if not mod_op.self_contained:
self._combine_non_self_contained_representations(
modality, mod, not_self_contained_reps, local_results
)

for context_operator_after in context_operators:
con_op_after = context_operator_after()
mod = mod.context(con_op_after)
self._evaluate_local(mod, [mod_op, con_op_after], local_results)

return local_results

def _combine_non_self_contained_representations(
self,
modality: Modality,
representation: TransformedModality,
other_representations,
local_results,
):
combined = representation
context_operators = self.operator_registry.get_context_operators()
used_representations = representation.transformation
for other_representation in other_representations:
used_representations.append(other_representation())
for combination in [Concatenation(), Hadamard(), Sum()]:
combined = combined.combine(
modality.apply_representation(other_representation()), combination
)
self._evaluate_local(
combined, used_representations, local_results, combination
)

for context_op in context_operators:
con_op = context_op()
mod = combined.context(con_op)
c_t = copy.deepcopy(used_representations)
c_t.append(con_op)
self._evaluate_local(mod, c_t, local_results, combination)

def _merge_results(self, local_results):
"""Merge local results into the main results"""
for modality_id in local_results.results:
Expand All @@ -145,7 +167,9 @@ def _merge_results(self, local_results):
for key, value in local_results.cache[modality][task_name].items():
self.operator_performance.cache[modality][task_name][key] = value

def _evaluate_local(self, modality, representations, local_results):
def _evaluate_local(
self, modality, representations, local_results, combination=None
):
if self._tasks_require_same_dims:
if self.expected_dimensions == 1 and get_shape(modality.metadata) > 1:
# for aggregation in Aggregation().get_aggregation_functions():
Expand All @@ -165,6 +189,7 @@ def _evaluate_local(self, modality, representations, local_results):
modality,
task.model.name,
end - start,
combination,
)
else:
modality.pad()
Expand All @@ -178,6 +203,7 @@ def _evaluate_local(self, modality, representations, local_results):
modality,
task.model.name,
end - start,
combination,
)
else:
for task in self.tasks:
Expand All @@ -198,6 +224,7 @@ def _evaluate_local(self, modality, representations, local_results):
modality,
task.model.name,
end - start,
combination,
)
else:
# modality.pad()
Expand All @@ -210,6 +237,7 @@ def _evaluate_local(self, modality, representations, local_results):
modality,
task.model.name,
end - start,
combination,
)


Expand All @@ -228,7 +256,9 @@ def __init__(self, modalities, tasks, debug=False):
self.cache[modality][task_name] = {}
self.results[modality][task_name] = []

def add_result(self, scores, representations, modality, task_name, task_time):
def add_result(
self, scores, representations, modality, task_name, task_time, combination
):
parameters = []
representation_names = []

Expand Down Expand Up @@ -256,6 +286,7 @@ def add_result(self, scores, representations, modality, task_name, task_time):
val_score=scores[1],
representation_time=modality.transform_time,
task_time=task_time,
combination=combination.name if combination else "",
)
self.results[modality.modality_id][task_name].append(entry)
self.cache[modality.modality_id][task_name][
Expand Down Expand Up @@ -302,3 +333,4 @@ class ResultEntry:
train_score: float
representation_time: float
task_time: float
combination: str
2 changes: 1 addition & 1 deletion src/main/python/systemds/scuro/modality/modality.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,6 @@ def is_aligned(self, other_modality):
!= list(other_modality.metadata.values())[i]["data_layout"]["shape"]
):
aligned = False
continue
break

return aligned
Loading
Loading