Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/main/python/systemds/scuro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@
from systemds.scuro.representations.tfidf import TfIdf
from systemds.scuro.representations.unimodal import UnimodalRepresentation
from systemds.scuro.representations.wav2vec import Wav2Vec
from systemds.scuro.representations.window_aggregation import WindowAggregation
from systemds.scuro.representations.window_aggregation import (
WindowAggregation,
DynamicWindow,
StaticWindow,
)
from systemds.scuro.representations.word2vec import W2V
from systemds.scuro.representations.x3d import X3D
from systemds.scuro.models.model import Model
Expand Down Expand Up @@ -145,4 +149,6 @@
"RMSE",
"Spectral",
"AttentionFusion",
"DynamicWindow",
"StaticWindow",
]
4 changes: 2 additions & 2 deletions src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def _process_modality(self, modality, parallel):

for context_operator_after in context_operators:
con_op_after = context_operator_after()
mod = mod.context(con_op_after)
self._evaluate_local(mod, [mod_op, con_op_after], local_results)
mod_con = mod.context(con_op_after)
self._evaluate_local(mod_con, [mod_op, con_op_after], local_results)

return local_results

Expand Down
2 changes: 2 additions & 0 deletions src/main/python/systemds/scuro/representations/fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def get_max_embedding_size(self, modalities: List[Modality]):
curr_shape = modalities[idx].data[0].shape
if len(modalities[idx - 1].data) != len(modalities[idx].data):
raise f"Modality sizes don't match!"
elif len(curr_shape) == 1:
continue
elif curr_shape[1] > max_size:
max_size = curr_shape[1]

Expand Down
107 changes: 90 additions & 17 deletions src/main/python/systemds/scuro/representations/window_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
# under the License.
#
# -------------------------------------------------------------
import copy

import numpy as np
import math

Expand All @@ -28,17 +30,13 @@
from systemds.scuro.representations.context import Context


@register_context_operator()
class WindowAggregation(Context):
def __init__(self, window_size=10, aggregation_function="mean", pad=True):
class Window(Context):
def __init__(self, name, aggregation_function):
parameters = {
"window_size": [window_size],
"aggregation_function": list(Aggregation().get_aggregation_functions()),
} # TODO: window_size should be dynamic and adapted to the shape of the data
super().__init__("WindowAggregation", parameters)
self.window_size = window_size
}
super().__init__(name, parameters)
self.aggregation_function = aggregation_function
self.pad = pad

@property
def aggregation_function(self):
Expand All @@ -48,6 +46,15 @@ def aggregation_function(self):
def aggregation_function(self, value):
self._aggregation_function = Aggregation(value)


@register_context_operator()
class WindowAggregation(Window):
def __init__(self, window_size=10, aggregation_function="mean", pad=False):
super().__init__("WindowAggregation", aggregation_function)
self.parameters["window_size"] = [window_size]
self.window_size = window_size
self.pad = pad

def execute(self, modality):
windowed_data = []
original_lengths = []
Expand Down Expand Up @@ -107,24 +114,90 @@ def execute(self, modality):
def window_aggregate_single_level(self, instance, new_length):
if isinstance(instance, str):
return instance
instance = np.array(instance)
num_cols = instance.shape[1] if instance.ndim > 1 else 1
result = np.empty((new_length, num_cols))
instance = np.array(copy.deepcopy(instance))

result = []
for i in range(0, new_length):
result[i] = self.aggregation_function.aggregate_instance(
instance[i * self.window_size : i * self.window_size + self.window_size]
result.append(
self.aggregation_function.aggregate_instance(
instance[
i * self.window_size : i * self.window_size + self.window_size
]
)
)

if num_cols == 1:
result = result.reshape(-1)
return result
return np.array(result)

def window_aggregate_nested_level(self, instance, new_length):
result = [[] for _ in range(0, new_length)]
data = np.stack(instance)
data = np.stack(copy.deepcopy(instance))
for i in range(0, new_length):
result[i] = self.aggregation_function.aggregate_instance(
data[i * self.window_size : i * self.window_size + self.window_size]
)

return np.array(result)


@register_context_operator()
class StaticWindow(Window):
def __init__(self, num_windows=100, aggregation_function="mean"):
super().__init__("StaticWindow", aggregation_function)
self.parameters["num_windows"] = [num_windows]
self.num_windows = num_windows

def execute(self, modality):
windowed_data = []

for instance in modality.data:
window_size = len(instance) // self.num_windows
remainder = len(instance) % self.num_windows
output = []
start = 0
for i in range(0, self.num_windows):
extra = 1 if i < remainder else 0
end = start + window_size + extra
window = copy.deepcopy(instance[start:end])
val = (
self.aggregation_function.aggregate_instance(window)
if len(window) > 0
else np.zeros_like(output[i - 1])
)
output.append(val)
start = end

windowed_data.append(output)
return np.array(windowed_data)


@register_context_operator()
class DynamicWindow(Window):
def __init__(self, num_windows=100, aggregation_function="mean"):
super().__init__("DynamicWindow", aggregation_function)
self.parameters["num_windows"] = [num_windows]
self.num_windows = num_windows

def execute(self, modality):
windowed_data = []

for instance in modality.data:
N = len(instance)
weights = np.geomspace(4, 256, num=self.num_windows)
weights = weights / np.sum(weights)
window_sizes = (weights * N).astype(int)
window_sizes[-1] += N - np.sum(window_sizes)
indices = np.cumsum(window_sizes)
output = []
start = 0
for end in indices:
window = copy.deepcopy(instance[start:end])
val = (
self.aggregation_function.aggregate_instance(window)
if len(window) > 0
else np.zeros_like(instance[0])
)
output.append(val)
start = end
windowed_data.append(output)

return np.array(windowed_data)
12 changes: 10 additions & 2 deletions src/main/python/tests/scuro/test_operator_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@
from systemds.scuro.representations.mfcc import MFCC
from systemds.scuro.representations.swin_video_transformer import SwinVideoTransformer
from systemds.scuro.representations.wav2vec import Wav2Vec
from systemds.scuro.representations.window_aggregation import WindowAggregation
from systemds.scuro.representations.window_aggregation import (
WindowAggregation,
StaticWindow,
DynamicWindow,
)
from systemds.scuro.representations.bow import BoW
from systemds.scuro.representations.word2vec import W2V
from systemds.scuro.representations.tfidf import TfIdf
Expand Down Expand Up @@ -83,7 +87,11 @@ def test_text_representations_in_registry(self):

def test_context_operator_in_registry(self):
registry = Registry()
assert registry.get_context_operators() == [WindowAggregation]
assert registry.get_context_operators() == [
WindowAggregation,
StaticWindow,
DynamicWindow,
]

# def test_fusion_operator_in_registry(self):
# registry = Registry()
Expand Down
2 changes: 1 addition & 1 deletion src/main/python/tests/scuro/test_unimodal_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def setUpClass(cls):

def test_unimodal_optimizer_for_audio_modality(self):
audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data(
self.num_instances, 100
self.num_instances, 3000
)
audio = UnimodalModality(
TestDataLoader(
Expand Down
53 changes: 47 additions & 6 deletions src/main/python/tests/scuro/test_window_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,13 @@

import numpy as np

from tests.scuro.data_generator import ModalityRandomDataGenerator
from tests.scuro.data_generator import ModalityRandomDataGenerator, TestDataLoader
from systemds.scuro.modality.type import ModalityType
from systemds.scuro.modality.unimodal_modality import UnimodalModality
from systemds.scuro.representations.window_aggregation import (
StaticWindow,
DynamicWindow,
)


class TestWindowOperations(unittest.TestCase):
Expand All @@ -35,20 +40,56 @@ def setUpClass(cls):
cls.data_generator = ModalityRandomDataGenerator()
cls.aggregations = ["mean", "sum", "max", "min"]

def test_window_operations_on_audio_representations(self):
def test_static_window(self):
num_windows = 5
data, md = self.data_generator.create_visual_modality(self.num_instances, 50)
modality = UnimodalModality(
TestDataLoader(
[i for i in range(0, self.num_instances)],
None,
ModalityType.VIDEO,
data,
np.float32,
md,
)
)
aggregated_window = modality.context(StaticWindow(num_windows))

for i in range(0, self.num_instances):
assert len(aggregated_window.data[i]) == num_windows

def test_dynamic_window(self):
num_windows = 5
data, md = self.data_generator.create_visual_modality(self.num_instances, 50)
modality = UnimodalModality(
TestDataLoader(
[i for i in range(0, self.num_instances)],
None,
ModalityType.VIDEO,
data,
np.float32,
md,
)
)
aggregated_window = modality.context(DynamicWindow(num_windows))

for i in range(0, self.num_instances):
assert len(aggregated_window.data[i]) == num_windows

def test_window_aggregation_on_audio_representations(self):
window_size = 10
self.run_window_operations_for_modality(ModalityType.AUDIO, window_size)
self.run_window_aggregation_for_modality(ModalityType.AUDIO, window_size)

def test_window_operations_on_video_representations(self):
window_size = 10
self.run_window_operations_for_modality(ModalityType.VIDEO, window_size)
self.run_window_aggregation_for_modality(ModalityType.VIDEO, window_size)

def test_window_operations_on_text_representations(self):
window_size = 10

self.run_window_operations_for_modality(ModalityType.TEXT, window_size)
self.run_window_aggregation_for_modality(ModalityType.TEXT, window_size)

def run_window_operations_for_modality(self, modality_type, window_size):
def run_window_aggregation_for_modality(self, modality_type, window_size):
r = self.data_generator.create1DModality(40, 100, modality_type)
for aggregation in self.aggregations:
windowed_modality = r.window_aggregation(window_size, aggregation)
Expand Down
Loading