From 21352eeec181b1ce095b62762cd41ff1388f2659 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Mon, 28 Jul 2025 14:13:02 +0200 Subject: [PATCH 01/19] add imporved unimodal optimizer --- .../scuro/drsearch/unimodal_optimizer.py | 119 ++++++++++++++++++ .../systemds/scuro/utils/schema_helpers.py | 4 + 2 files changed, 123 insertions(+) create mode 100644 src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py new file mode 100644 index 00000000000..24839088df8 --- /dev/null +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -0,0 +1,119 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +from build.lib.systemds.scuro.representations.aggregated_representation import AggregatedRepresentation +from systemds.scuro import ModalityType, Aggregation +from systemds.scuro.drsearch.operator_registry import Registry +from systemds.scuro.utils.schema_helpers import get_shape + + +class UnimodalOptimizer: + def __init__(self, modalities, tasks): + self.modalities = modalities + self.tasks = tasks + + self.operator_registry = Registry() + self.operator_performance = {} + + for modality in self.modalities: + self.operator_performance[modality.modality_id] = {} + for task in tasks: + self.operator_performance[modality.modality_id][task.model.name] = UnimodalResults(modality.modality_id, task.name) + + + def get_k_best_results(self, modality, k, task): + """ + Get the k best results for the given modality + :param modality: modality to get the best results for + :param k: number of best results + """ + + results = self.operator_performance[modality.modality_id][task.model.name].get_k_best_results(k) + + return results + + def optimize(self): + for modality in self.modalities: + context_operators = self.operator_registry.get_context_operators() + + for context_operator in context_operators: + context_representation = None + if modality.modality_type != ModalityType.TEXT: + con_op = context_operator() + context_representation = modality.context(con_op) + self.evaluate(context_representation, [context_operator.__name__], [con_op.parameters]) + + modality_specific_operators = self.operator_registry.get_representations(modality.modality_type) + for modality_specific_operator in modality_specific_operators: + mod_context = None + mod_op = modality_specific_operator() + if context_representation is not None: + mod_context = context_representation.apply_representation(mod_op) + self.evaluate(mod_context, [context_operator.__name__, modality_specific_operator.__name__], [con_op.parameters, mod_op.parameters]) + + + mod = modality.apply_representation(mod_op) + self.evaluate(mod, [modality_specific_operator.__name__], + [mod_op.parameters]) + + for context_operator_after in context_operators: + con_op_after = context_operator_after() + if mod_context is not None: + mod_context = mod_context.context(con_op_after) + self.evaluate(mod_context, + [context_operator.__name__, modality_specific_operator.__name__, context_operator_after.__name__], + [con_op.parameters, mod_op.parameters, con_op_after.parameters]) + + mod = mod.context(con_op_after) + self.evaluate(mod, [modality_specific_operator.__name__, context_operator_after.__name__], + [mod_op.parameters, con_op_after.parameters]) + + def evaluate(self, modality, representation_names, params): + for task in self.tasks: + if task.expected_dim == 1 and get_shape(modality.metadata) > 1: + for aggregation in Aggregation().get_aggregation_functions(): + # padding should not be necessary here + agg_operator = AggregatedRepresentation(Aggregation(aggregation, False)) + agg_modality = agg_operator.transform(modality) + + scores = task.run(agg_modality.data) + rep_names = representation_names.copy() + rep_names.append(agg_operator.name) + + rep_params = params.copy() + rep_params.append(agg_operator.parameters) + self.operator_performance[modality.modality_id][task.model.name].add_result(scores, rep_names, rep_params) + else: + scores = task.run(modality.data) + self.operator_performance[modality.modality_id][task.model.name].add_result(scores, representation_names, params) + + +class UnimodalResults: + def __init__(self, modality_id, task_name): + self.modality_id = modality_id + self.task_name = task_name + self.results = {'representations': [], 'params': [], 'train_score': [], 'val_score':[]} + + def add_result(self, scores, representations, params): + self.results['representations'].append(representations) + self.results['params'].append([param.copy() if param is not None else param for param in params ]) + self.results['train_score'].append(scores[0]) + self.results['val_score'].append(scores[1]) + \ No newline at end of file diff --git a/src/main/python/systemds/scuro/utils/schema_helpers.py b/src/main/python/systemds/scuro/utils/schema_helpers.py index 28af476cca4..3d1fbf4d71a 100644 --- a/src/main/python/systemds/scuro/utils/schema_helpers.py +++ b/src/main/python/systemds/scuro/utils/schema_helpers.py @@ -40,3 +40,7 @@ def calculate_new_frequency(new_length, old_length, old_frequency): duration = old_length / old_frequency new_frequency = new_length / duration return new_frequency + + +def get_shape(metadata): + return len(list(metadata.values())[0]["data_layout"]["shape"]) From faa31a2bcd9f86d9a15dfd4cc2de9fc3e51ba94f Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Mon, 28 Jul 2025 14:25:39 +0200 Subject: [PATCH 02/19] add subclass to store results --- .../tests/scuro/test_unimodal_optimizer.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/main/python/tests/scuro/test_unimodal_optimizer.py b/src/main/python/tests/scuro/test_unimodal_optimizer.py index 9ed034e5fe8..41bd2af1367 100644 --- a/src/main/python/tests/scuro/test_unimodal_optimizer.py +++ b/src/main/python/tests/scuro/test_unimodal_optimizer.py @@ -20,7 +20,6 @@ # ------------------------------------------------------------- -import shutil import unittest import numpy as np @@ -31,8 +30,8 @@ from systemds.scuro.drsearch.operator_registry import Registry from systemds.scuro.models.model import Model from systemds.scuro.drsearch.task import Task -from systemds.scuro.drsearch.unimodal_representation_optimizer import ( - UnimodalRepresentationOptimizer, +from systemds.scuro.drsearch.unimodal_optimizer import ( + UnimodalOptimizer, ) from systemds.scuro.representations.spectrogram import Spectrogram @@ -41,9 +40,6 @@ from systemds.scuro.representations.resnet import ResNet from tests.scuro.data_generator import ModalityRandomDataGenerator, TestDataLoader -from systemds.scuro.dataloader.audio_loader import AudioLoader -from systemds.scuro.dataloader.video_loader import VideoLoader -from systemds.scuro.dataloader.text_loader import TextLoader from systemds.scuro.modality.type import ModalityType @@ -186,21 +182,21 @@ def optimize_unimodal_representation_for_modality(self, modality): ): registry = Registry() - unimodal_optimizer = UnimodalRepresentationOptimizer( - [modality], self.tasks, max_chain_depth=2 + unimodal_optimizer = UnimodalOptimizer( + [modality], self.tasks ) unimodal_optimizer.optimize() assert ( - list(unimodal_optimizer.optimization_results.keys())[0] + list(unimodal_optimizer.operator_performance.keys())[0] == modality.modality_id ) - assert len(list(unimodal_optimizer.optimization_results.values())[0]) == 2 + assert len(list(unimodal_optimizer.operator_performance.values())[0]) == 2 assert ( len( unimodal_optimizer.get_k_best_results(modality, 1, self.tasks[0])[ 0 - ].operator_chain + ].representations ) >= 1 ) From 1b6a2ed265428d812312e9ac3d87489d3cc78afa Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Tue, 29 Jul 2025 10:30:57 +0200 Subject: [PATCH 03/19] add params as strings --- .../scuro/drsearch/multimodal_optimizer.py | 103 +++++++++++++ .../scuro/drsearch/operator_registry.py | 12 ++ .../scuro/drsearch/unimodal_optimizer.py | 136 ++++++++++++------ 3 files changed, 205 insertions(+), 46 deletions(-) create mode 100644 src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py new file mode 100644 index 00000000000..6f064f896c5 --- /dev/null +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -0,0 +1,103 @@ +from systemds.scuro.representations.aggregated_representation import ( + AggregatedRepresentation, +) + +from systemds.scuro.representations.aggregate import Aggregation + +from systemds.scuro.drsearch.operator_registry import Registry + +from systemds.scuro.utils.schema_helpers import get_shape +import dataclasses + + +class MultimodalOptimizer: + def __init__(self, modalities, unimodal_optimizer, tasks, k=2): + self.k_best_modalities = None + self.modalities = modalities + self.unimodal_optimizer = unimodal_optimizer + self.tasks = tasks + self.k = k + self.extract_k_best_modalities_per_task() + self.operator_registry = Registry() + self.optimization_results = {} + + def optimize(self): + for task in self.tasks: + for modality in self.modalities: + representations = self.k_best_modalities[task][modality.modality_id] + applied_representations = [] + for i in range(0, len(representations)): + applied_representation = modality + for j, rep in enumerate(representations[i].representations): + representation, is_context = ( + self.operator_registry.get_representation_by_name( + rep, modality.modality_type + ) + ) + if representation is None: + if rep == AggregatedRepresentation.__name__: + representation = AggregatedRepresentation(Aggregation()) + else: + representation = representation() + representation.set_parameters(representations[i].params[j]) + if is_context: + applied_representation = applied_representation.context( + representation + ) + else: + applied_representation = ( + applied_representation.apply_representation( + representation + ) + ) + applied_representations.append(applied_representation) + + def evaluate(self, task, modality, representation_names, params): + if task.expected_dim == 1 and get_shape(modality.metadata) > 1: + for aggregation in Aggregation().get_aggregation_functions(): + # padding should not be necessary here + agg_operator = AggregatedRepresentation(Aggregation(aggregation, False)) + agg_modality = agg_operator.transform(modality) + + scores = task.run(agg_modality.data) + rep_names = representation_names.copy() + rep_names.append(agg_operator.name) + + rep_params = params.copy() + rep_params.append(agg_operator.parameters) + self.optimization_results[modality.modality_id][ + task.model.name + ].add_result(scores, rep_names, rep_params) + else: + scores = task.run(modality.data) + self.optimization_results[modality.modality_id][task.model.name].add_result( + scores, representation_names, params + ) + + def extract_k_best_modalities_per_task(self): + self.k_best_modalities = {} + for task in self.tasks: + self.k_best_modalities[task] = {} + for modality in self.modalities: + self.k_best_modalities[task][modality.modality_id] = ( + self.unimodal_optimizer.get_k_best_results(modality, self.k, task) + ) + + +class MultimodalResults: + def __init__(self, modality, task): + self.modality_id = modality.modality_id + self.task = task + + self.results = [] + + +@dataclasses.dataclass +class MultimodalResultEntry: + val_score: float + modality_ids: list + representations: list + fusion_method: str + representation_params: list + train_score: float + fusion_params: list diff --git a/src/main/python/systemds/scuro/drsearch/operator_registry.py b/src/main/python/systemds/scuro/drsearch/operator_registry.py index cfd313eb563..3909b51ff98 100644 --- a/src/main/python/systemds/scuro/drsearch/operator_registry.py +++ b/src/main/python/systemds/scuro/drsearch/operator_registry.py @@ -64,6 +64,18 @@ def get_context_operators(self): def get_fusion_operators(self): return self._fusion_operators + def get_representation_by_name(self, representation_name, modality_type): + for representation in self._context_operators: + if representation.__name__ == representation_name: + return representation, True + + if modality_type is not None: + for representation in self._representations[modality_type]: + if representation.__name__ == representation_name: + return representation, False + + return None, False + def register_representation(modalities: Union[ModalityType, List[ModalityType]]): """ diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 24839088df8..99093a8faa6 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -18,7 +18,13 @@ # under the License. # # ------------------------------------------------------------- -from build.lib.systemds.scuro.representations.aggregated_representation import AggregatedRepresentation +from dataclasses import dataclass + +from systemds.scuro.representations.window_aggregation import WindowAggregation + +from build.lib.systemds.scuro.representations.aggregated_representation import ( + AggregatedRepresentation, +) from systemds.scuro import ModalityType, Aggregation from systemds.scuro.drsearch.operator_registry import Registry from systemds.scuro.utils.schema_helpers import get_shape @@ -28,92 +34,130 @@ class UnimodalOptimizer: def __init__(self, modalities, tasks): self.modalities = modalities self.tasks = tasks - + self.operator_registry = Registry() self.operator_performance = {} for modality in self.modalities: self.operator_performance[modality.modality_id] = {} for task in tasks: - self.operator_performance[modality.modality_id][task.model.name] = UnimodalResults(modality.modality_id, task.name) - - + self.operator_performance[modality.modality_id][task.model.name] = ( + UnimodalResults(modality.modality_id, task.name) + ) + def get_k_best_results(self, modality, k, task): """ Get the k best results for the given modality :param modality: modality to get the best results for :param k: number of best results """ - - results = self.operator_performance[modality.modality_id][task.model.name].get_k_best_results(k) + + results = sorted( + self.operator_performance[modality.modality_id][task.model.name].results, + key=lambda x: x.val_score, + reverse=True, + )[:k] return results - + def optimize(self): for modality in self.modalities: context_operators = self.operator_registry.get_context_operators() - + for context_operator in context_operators: context_representation = None if modality.modality_type != ModalityType.TEXT: con_op = context_operator() context_representation = modality.context(con_op) - self.evaluate(context_representation, [context_operator.__name__], [con_op.parameters]) - - modality_specific_operators = self.operator_registry.get_representations(modality.modality_type) + self.evaluate(context_representation, [con_op]) + + modality_specific_operators = ( + self.operator_registry.get_representations(modality.modality_type) + ) for modality_specific_operator in modality_specific_operators: mod_context = None mod_op = modality_specific_operator() if context_representation is not None: - mod_context = context_representation.apply_representation(mod_op) - self.evaluate(mod_context, [context_operator.__name__, modality_specific_operator.__name__], [con_op.parameters, mod_op.parameters]) - - + mod_context = context_representation.apply_representation( + mod_op + ) + self.evaluate(mod_context, [con_op, mod_op]) + mod = modality.apply_representation(mod_op) - self.evaluate(mod, [modality_specific_operator.__name__], - [mod_op.parameters]) - + self.evaluate(mod, [mod_op]) + for context_operator_after in context_operators: con_op_after = context_operator_after() if mod_context is not None: mod_context = mod_context.context(con_op_after) - self.evaluate(mod_context, - [context_operator.__name__, modality_specific_operator.__name__, context_operator_after.__name__], - [con_op.parameters, mod_op.parameters, con_op_after.parameters]) - + self.evaluate(mod_context, [con_op, mod_op, con_op_after]) + mod = mod.context(con_op_after) - self.evaluate(mod, [modality_specific_operator.__name__, context_operator_after.__name__], - [mod_op.parameters, con_op_after.parameters]) - - def evaluate(self, modality, representation_names, params): + self.evaluate(mod, [mod_op, con_op_after]) + + def evaluate(self, modality, representations): for task in self.tasks: if task.expected_dim == 1 and get_shape(modality.metadata) > 1: for aggregation in Aggregation().get_aggregation_functions(): # padding should not be necessary here - agg_operator = AggregatedRepresentation(Aggregation(aggregation, False)) + agg_operator = AggregatedRepresentation( + Aggregation(aggregation, False) + ) agg_modality = agg_operator.transform(modality) - + scores = task.run(agg_modality.data) - rep_names = representation_names.copy() - rep_names.append(agg_operator.name) - - rep_params = params.copy() - rep_params.append(agg_operator.parameters) - self.operator_performance[modality.modality_id][task.model.name].add_result(scores, rep_names, rep_params) + reps = representations.copy() + reps.append(agg_operator) + + self.operator_performance[modality.modality_id][ + task.model.name + ].add_result(scores, reps) else: scores = task.run(modality.data) - self.operator_performance[modality.modality_id][task.model.name].add_result(scores, representation_names, params) - - + self.operator_performance[modality.modality_id][ + task.model.name + ].add_result(scores, representations) + + class UnimodalResults: def __init__(self, modality_id, task_name): self.modality_id = modality_id self.task_name = task_name - self.results = {'representations': [], 'params': [], 'train_score': [], 'val_score':[]} - - def add_result(self, scores, representations, params): - self.results['representations'].append(representations) - self.results['params'].append([param.copy() if param is not None else param for param in params ]) - self.results['train_score'].append(scores[0]) - self.results['val_score'].append(scores[1]) - \ No newline at end of file + self.results = [] + + def add_result(self, scores, representations): + parameters = [] + representation_names = [] + + for rep in representations: + representation_names.append(type(rep).__name__) + if isinstance(rep, AggregatedRepresentation): + parameters.append(rep.parameters) + continue + + params = {} + for param in rep.parameters.keys(): + params[param] = getattr(rep, param) + + if isinstance(rep, WindowAggregation): + params["aggregation_function"] = ( + rep.aggregation_function.aggregation_function_name + ) + + parameters.append(params) + + entry = ResultEntry( + representations=representation_names, + params=parameters, + train_score=scores[0], + val_score=scores[1], + ) + self.results.append(entry) + + +@dataclass +class ResultEntry: + val_score: float + representations: list + params: list + train_score: float From 940b232fcefc55c2ab255ad0dec2e7b814ec16cd Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Tue, 29 Jul 2025 11:22:44 +0200 Subject: [PATCH 04/19] improve optimization results --- .../scuro/drsearch/multimodal_optimizer.py | 110 +++++++++++++----- .../scuro/drsearch/unimodal_optimizer.py | 49 +++++--- 2 files changed, 108 insertions(+), 51 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index 6f064f896c5..74901f6204b 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -21,38 +21,73 @@ def __init__(self, modalities, unimodal_optimizer, tasks, k=2): self.operator_registry = Registry() self.optimization_results = {} + for modality in self.modalities: + self.optimization_results[modality.modality_id] = {} + for task in tasks: + self.optimization_results[modality.modality_id][task.model.name] = ( + MultimodalResults(modality, task.name) + ) + def optimize(self): for task in self.tasks: for modality in self.modalities: representations = self.k_best_modalities[task][modality.modality_id] - applied_representations = [] - for i in range(0, len(representations)): - applied_representation = modality - for j, rep in enumerate(representations[i].representations): - representation, is_context = ( - self.operator_registry.get_representation_by_name( - rep, modality.modality_type - ) + applied_representations = self.extract_representations( + representations, modality + ) + combined_representations = [] + for i in range(1, len(applied_representations)): + for fusion_method in self.operator_registry.get_fusion_operators(): + combined = applied_representations[i - 1].combine( + applied_representations[i], fusion_method() + ) + self.evaluate( + task, + combined, + [i - 1, i], + fusion_method, + [modality.modality_id], ) - if representation is None: - if rep == AggregatedRepresentation.__name__: - representation = AggregatedRepresentation(Aggregation()) - else: - representation = representation() - representation.set_parameters(representations[i].params[j]) - if is_context: - applied_representation = applied_representation.context( - representation + if not fusion_method().commutative: + combined_comm = applied_representations[i].combine( + applied_representations[i - 1], fusion_method() ) - else: - applied_representation = ( - applied_representation.apply_representation( - representation - ) + self.evaluate( + task, + combined_comm, + [i, i - 1], + fusion_method, + [modality.modality_id], ) - applied_representations.append(applied_representation) - def evaluate(self, task, modality, representation_names, params): + def extract_representations(self, representations, modality): + applied_representations = [] + for i in range(0, len(representations)): + applied_representation = modality + for j, rep in enumerate(representations[i].representations): + representation, is_context = ( + self.operator_registry.get_representation_by_name( + rep, modality.modality_type + ) + ) + if representation is None: + if rep == AggregatedRepresentation.__name__: + representation = AggregatedRepresentation(Aggregation()) + else: + representation = representation() + representation.set_parameters(representations[i].params[j]) + if is_context: + applied_representation = applied_representation.context( + representation + ) + else: + applied_representation = ( + applied_representation.apply_representation(representation) + ) + applied_representations.append(applied_representation) + return applied_representations + + def evaluate(self, task, modality, representations, fusion, modality_ids): if task.expected_dim == 1 and get_shape(modality.metadata) > 1: for aggregation in Aggregation().get_aggregation_functions(): # padding should not be necessary here @@ -60,18 +95,16 @@ def evaluate(self, task, modality, representation_names, params): agg_modality = agg_operator.transform(modality) scores = task.run(agg_modality.data) - rep_names = representation_names.copy() - rep_names.append(agg_operator.name) + reps = representations.copy() + reps.append(agg_operator) - rep_params = params.copy() - rep_params.append(agg_operator.parameters) self.optimization_results[modality.modality_id][ task.model.name - ].add_result(scores, rep_names, rep_params) + ].add_result(scores, reps, fusion, modality_ids, task) else: scores = task.run(modality.data) self.optimization_results[modality.modality_id][task.model.name].add_result( - scores, representation_names, params + scores, representations, fusion, modality_ids, task ) def extract_k_best_modalities_per_task(self): @@ -91,6 +124,20 @@ def __init__(self, modality, task): self.results = [] + def add_result( + self, scores, best_representation_idx, fusion_method, modality_ids, task + ): + + entry = MultimodalResultEntry( + representations=best_representation_idx, + train_score=scores[0], + val_score=scores[1], + fusion_method=fusion_method.__name__, + modality_ids=modality_ids, + task=task, + ) + self.results.append(entry) + @dataclasses.dataclass class MultimodalResultEntry: @@ -98,6 +145,5 @@ class MultimodalResultEntry: modality_ids: list representations: list fusion_method: str - representation_params: list train_score: float - fusion_params: list + task: str diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 99093a8faa6..3f6671595e6 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -36,14 +36,14 @@ def __init__(self, modalities, tasks): self.tasks = tasks self.operator_registry = Registry() - self.operator_performance = {} + self.operator_performance = UnimodalResults(modalities, tasks) - for modality in self.modalities: - self.operator_performance[modality.modality_id] = {} - for task in tasks: - self.operator_performance[modality.modality_id][task.model.name] = ( - UnimodalResults(modality.modality_id, task.name) - ) + # for modality in self.modalities: + # self.operator_performance[modality.modality_id] = {} + # for task in tasks: + # self.operator_performance[modality.modality_id][task.model.name] = ( + # UnimodalResults(modality.modality_id, task.name) + # ) def get_k_best_results(self, modality, k, task): """ @@ -109,23 +109,28 @@ def evaluate(self, modality, representations): reps = representations.copy() reps.append(agg_operator) - self.operator_performance[modality.modality_id][ - task.model.name - ].add_result(scores, reps) + self.operator_performance.add_result( + scores, reps, modality.modality_id, task.model.name + ) else: scores = task.run(modality.data) - self.operator_performance[modality.modality_id][ - task.model.name - ].add_result(scores, representations) + self.operator_performance.add_result( + scores, representations, modality.modality_id, task.model.name + ) class UnimodalResults: - def __init__(self, modality_id, task_name): - self.modality_id = modality_id - self.task_name = task_name - self.results = [] + def __init__(self, modalities, tasks): + self.modality_ids = [modality.modality_id for modality in modalities] + self.task_names = [task.model.name for task in tasks] + self.results = {} + + for modality in self.modality_ids: + self.results[modality] = {} + for task_name in self.task_names: + self.results[modality][task_name] = [] - def add_result(self, scores, representations): + def add_result(self, scores, representations, modality_id, task_name): parameters = [] representation_names = [] @@ -152,7 +157,13 @@ def add_result(self, scores, representations): train_score=scores[0], val_score=scores[1], ) - self.results.append(entry) + self.results[modality_id][task_name].append(entry) + + def print_results(self): + for modality in self.modality_ids: + for task_name in self.task_names: + for entry in self.results[modality][task_name]: + print(f"{modality}_{task_name}: {entry}") @dataclass From fb553d4d7fbf8d0af2e51b821efa04c4680cd71c Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Wed, 30 Jul 2025 09:35:42 +0200 Subject: [PATCH 05/19] add deterministic random seed generator --- .../scuro/drsearch/unimodal_optimizer.py | 71 +++++++++++++------ .../systemds/scuro/modality/modality.py | 14 ++++ .../systemds/scuro/modality/transformed.py | 3 +- .../scuro/modality/unimodal_modality.py | 2 +- .../scuro/representations/aggregate.py | 1 + .../systemds/scuro/utils/static_variables.py | 7 ++ 6 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 src/main/python/systemds/scuro/utils/static_variables.py diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 3f6671595e6..4d595fac10c 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -31,19 +31,20 @@ class UnimodalOptimizer: - def __init__(self, modalities, tasks): + def __init__(self, modalities, tasks, debug=True): self.modalities = modalities self.tasks = tasks self.operator_registry = Registry() - self.operator_performance = UnimodalResults(modalities, tasks) + self.operator_performance = UnimodalResults(modalities, tasks, debug) - # for modality in self.modalities: - # self.operator_performance[modality.modality_id] = {} - # for task in tasks: - # self.operator_performance[modality.modality_id][task.model.name] = ( - # UnimodalResults(modality.modality_id, task.name) - # ) + self._tasks_require_same_dims = True + self.expected_dimensions = None + + for i in range(1, len(self.tasks)): + self.expected_dimensions = tasks[i].expected_dim + if tasks[i - 1].expected_dim != tasks[i].expected_dim: + self._tasks_require_same_dims = False def get_k_best_results(self, modality, k, task): """ @@ -53,7 +54,7 @@ def get_k_best_results(self, modality, k, task): """ results = sorted( - self.operator_performance[modality.modality_id][task.model.name].results, + self.operator_performance.results[modality.modality_id][task.model.name], key=lambda x: x.val_score, reverse=True, )[:k] @@ -96,34 +97,61 @@ def optimize(self): self.evaluate(mod, [mod_op, con_op_after]) def evaluate(self, modality, representations): - for task in self.tasks: - if task.expected_dim == 1 and get_shape(modality.metadata) > 1: + if self._tasks_require_same_dims: + if self.expected_dimensions == 1 and get_shape(modality.metadata) > 1: for aggregation in Aggregation().get_aggregation_functions(): - # padding should not be necessary here agg_operator = AggregatedRepresentation( Aggregation(aggregation, False) ) agg_modality = agg_operator.transform(modality) - scores = task.run(agg_modality.data) reps = representations.copy() reps.append(agg_operator) + agg_modality.pad() + for task in self.tasks: + scores = task.run(agg_modality.data) + self.operator_performance.add_result( + scores, reps, modality.modality_id, task.model.name + ) + else: + modality.pad() + for task in self.tasks: + scores = task.run(modality.data) self.operator_performance.add_result( - scores, reps, modality.modality_id, task.model.name + scores, representations, modality.modality_id, task.model.name + ) + else: + for task in self.tasks: + if task.expected_dim == 1 and get_shape(modality.metadata) > 1: + for aggregation in Aggregation().get_aggregation_functions(): + agg_operator = AggregatedRepresentation( + Aggregation(aggregation, False) + ) + agg_modality = agg_operator.transform(modality) + + reps = representations.copy() + reps.append(agg_operator) + modality.pad() + scores = task.run(agg_modality.data) + + self.operator_performance.add_result( + scores, reps, modality.modality_id, task.model.name + ) + else: + modality.pad() + scores = task.run(modality.data) + self.operator_performance.add_result( + scores, representations, modality.modality_id, task.model.name ) - else: - scores = task.run(modality.data) - self.operator_performance.add_result( - scores, representations, modality.modality_id, task.model.name - ) class UnimodalResults: - def __init__(self, modalities, tasks): + def __init__(self, modalities, tasks, debug=False): self.modality_ids = [modality.modality_id for modality in modalities] self.task_names = [task.model.name for task in tasks] self.results = {} + self.debug = debug for modality in self.modality_ids: self.results[modality] = {} @@ -159,6 +187,9 @@ def add_result(self, scores, representations, modality_id, task_name): ) self.results[modality_id][task_name].append(entry) + if self.debug: + print(f"{modality_id}_{task_name}: {entry}") + def print_results(self): for modality in self.modality_ids: for task_name in self.task_names: diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py index 87d5b5ee4e4..3b1076b3252 100644 --- a/src/main/python/systemds/scuro/modality/modality.py +++ b/src/main/python/systemds/scuro/modality/modality.py @@ -120,6 +120,20 @@ def flatten(self, padding=True): self.data = np.array(self.data) return self + def pad(self, value=0): + try: + result = np.array(self.data) + except: + maxlen = max([len(seq) for seq in self.data]) + + result = np.full((len(self.data), maxlen), value, dtype=self.data_type) + + for i, seq in enumerate(self.data): + data = seq[:maxlen] + result[i, : len(data)] = data + + self.data = result + def get_data_layout(self): if self.has_metadata(): return list(self.metadata.values())[0]["data_layout"]["representation"] diff --git a/src/main/python/systemds/scuro/modality/transformed.py b/src/main/python/systemds/scuro/modality/transformed.py index 362764d21e9..1a292b495b0 100644 --- a/src/main/python/systemds/scuro/modality/transformed.py +++ b/src/main/python/systemds/scuro/modality/transformed.py @@ -20,6 +20,7 @@ # ------------------------------------------------------------- from functools import reduce from operator import or_ +from typing import Union, List from systemds.scuro.modality.type import ModalityType from systemds.scuro.modality.joined import JoinedModality @@ -87,7 +88,7 @@ def apply_representation(self, representation): new_modality.update_metadata() return new_modality - def combine(self, other, fusion_method): + def combine(self, other: Union[Modality, List[Modality]], fusion_method): """ Combines two or more modalities with each other using a dedicated fusion method :param other: The modality to be combined diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py b/src/main/python/systemds/scuro/modality/unimodal_modality.py index c0ee70557c5..fb117aa32e8 100644 --- a/src/main/python/systemds/scuro/modality/unimodal_modality.py +++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py @@ -117,7 +117,7 @@ def apply_representation(self, representation): new_modality.data.extend(transformed_chunk.data) new_modality.metadata.update(transformed_chunk.metadata) else: - if not self.data: + if not self.has_data(): self.extract_raw_data() new_modality = representation.transform(self) diff --git a/src/main/python/systemds/scuro/representations/aggregate.py b/src/main/python/systemds/scuro/representations/aggregate.py index 756e6271ea5..506d16f8d08 100644 --- a/src/main/python/systemds/scuro/representations/aggregate.py +++ b/src/main/python/systemds/scuro/representations/aggregate.py @@ -58,6 +58,7 @@ def __init__(self, aggregation_function="mean", pad_modality=False, params=None) self._aggregation_func = self._aggregation_function[aggregation_function] self.name = "Aggregation" self.pad_modality = pad_modality + self.aggregation_function_name = aggregation_function self.parameters = { "aggregation_function": aggregation_function, diff --git a/src/main/python/systemds/scuro/utils/static_variables.py b/src/main/python/systemds/scuro/utils/static_variables.py new file mode 100644 index 00000000000..b1733387160 --- /dev/null +++ b/src/main/python/systemds/scuro/utils/static_variables.py @@ -0,0 +1,7 @@ +import numpy as np + +global_rng = np.random.default_rng(42) + + +def get_seed(): + return global_rng.integers(0, 1024) From 098eb2857c34d688a861f8695d5674b85914ac25 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Wed, 30 Jul 2025 09:46:37 +0200 Subject: [PATCH 06/19] aggeregation function as hyperparameter --- .../scuro/drsearch/unimodal_optimizer.py | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 4d595fac10c..8f450005b9c 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -99,21 +99,21 @@ def optimize(self): def evaluate(self, modality, representations): if self._tasks_require_same_dims: if self.expected_dimensions == 1 and get_shape(modality.metadata) > 1: - for aggregation in Aggregation().get_aggregation_functions(): - agg_operator = AggregatedRepresentation( - Aggregation(aggregation, False) - ) - agg_modality = agg_operator.transform(modality) + # for aggregation in Aggregation().get_aggregation_functions(): + agg_operator = AggregatedRepresentation( + Aggregation() + ) + agg_modality = agg_operator.transform(modality) - reps = representations.copy() - reps.append(agg_operator) - agg_modality.pad() - for task in self.tasks: - scores = task.run(agg_modality.data) + reps = representations.copy() + reps.append(agg_operator) + agg_modality.pad() + for task in self.tasks: + scores = task.run(agg_modality.data) - self.operator_performance.add_result( - scores, reps, modality.modality_id, task.model.name - ) + self.operator_performance.add_result( + scores, reps, modality.modality_id, task.model.name + ) else: modality.pad() for task in self.tasks: @@ -124,20 +124,20 @@ def evaluate(self, modality, representations): else: for task in self.tasks: if task.expected_dim == 1 and get_shape(modality.metadata) > 1: - for aggregation in Aggregation().get_aggregation_functions(): - agg_operator = AggregatedRepresentation( - Aggregation(aggregation, False) - ) - agg_modality = agg_operator.transform(modality) + # for aggregation in Aggregation().get_aggregation_functions(): + agg_operator = AggregatedRepresentation( + Aggregation() + ) + agg_modality = agg_operator.transform(modality) - reps = representations.copy() - reps.append(agg_operator) - modality.pad() - scores = task.run(agg_modality.data) + reps = representations.copy() + reps.append(agg_operator) + modality.pad() + scores = task.run(agg_modality.data) - self.operator_performance.add_result( - scores, reps, modality.modality_id, task.model.name - ) + self.operator_performance.add_result( + scores, reps, modality.modality_id, task.model.name + ) else: modality.pad() scores = task.run(modality.data) From 28a001127c327422b28d83de316611c8dbd6d624 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Wed, 30 Jul 2025 14:55:19 +0200 Subject: [PATCH 07/19] parallelize unimodal optimization --- .../scuro/drsearch/multimodal_optimizer.py | 11 ++ .../scuro/drsearch/unimodal_optimizer.py | 127 ++++++++++++------ .../systemds/scuro/modality/modality.py | 12 ++ .../python/systemds/scuro/modality/type.py | 16 ++- .../scuro/representations/aggregate.py | 4 +- .../scuro/representations/representation.py | 2 +- .../representations/window_aggregation.py | 2 +- 7 files changed, 121 insertions(+), 53 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index 74901f6204b..8dd2273caca 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -20,6 +20,7 @@ def __init__(self, modalities, unimodal_optimizer, tasks, k=2): self.extract_k_best_modalities_per_task() self.operator_registry = Registry() self.optimization_results = {} + self.cache = {} for modality in self.modalities: self.optimization_results[modality.modality_id] = {} @@ -38,6 +39,13 @@ def optimize(self): combined_representations = [] for i in range(1, len(applied_representations)): for fusion_method in self.operator_registry.get_fusion_operators(): + if ( + fusion_method().needs_alignment + and not applied_representations[i - 1].is_aligned( + applied_representations[i] + ) + ): + continue combined = applied_representations[i - 1].combine( applied_representations[i], fusion_method() ) @@ -107,6 +115,9 @@ def evaluate(self, task, modality, representations, fusion, modality_ids): scores, representations, fusion, modality_ids, task ) + def add_to_cache(self, result_idx, combined_modality): + self.cache[result_idx] = combined_modality + def extract_k_best_modalities_per_task(self): self.k_best_modalities = {} for task in self.tasks: diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 8f450005b9c..589b1932913 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -18,8 +18,11 @@ # under the License. # # ------------------------------------------------------------- +import pickle +from concurrent.futures import ProcessPoolExecutor, as_completed from dataclasses import dataclass +import multiprocessing as mp from systemds.scuro.representations.window_aggregation import WindowAggregation from build.lib.systemds.scuro.representations.aggregated_representation import ( @@ -39,9 +42,9 @@ def __init__(self, modalities, tasks, debug=True): self.operator_performance = UnimodalResults(modalities, tasks, debug) self._tasks_require_same_dims = True - self.expected_dimensions = None + self.expected_dimensions = tasks[0].expected_dim - for i in range(1, len(self.tasks)): + for i in range(1, len(tasks)): self.expected_dimensions = tasks[i].expected_dim if tasks[i - 1].expected_dim != tasks[i].expected_dim: self._tasks_require_same_dims = False @@ -61,73 +64,111 @@ def get_k_best_results(self, modality, k, task): return results - def optimize(self): + def optimize_parallel(self, n_workers=None): + if n_workers is None: + n_workers = min(len(self.modalities), mp.cpu_count()) + + with ProcessPoolExecutor(max_workers=n_workers) as executor: + future_to_modality = { + executor.submit(self._process_modality, modality): modality + for modality in self.modalities + } + + for future in as_completed(future_to_modality): + modality = future_to_modality[future] + # try: + results = future.result() + self._merge_results(results) + # except Exception as exc: + # print(f'Modality {modality.modality_id} generated an exception: {exc}') + + def optimize(self, n_workers=None): for modality in self.modalities: - context_operators = self.operator_registry.get_context_operators() + self._process_modality(modality) - for context_operator in context_operators: - context_representation = None - if modality.modality_type != ModalityType.TEXT: - con_op = context_operator() - context_representation = modality.context(con_op) - self.evaluate(context_representation, [con_op]) - - modality_specific_operators = ( - self.operator_registry.get_representations(modality.modality_type) - ) - for modality_specific_operator in modality_specific_operators: - mod_context = None - mod_op = modality_specific_operator() - if context_representation is not None: - mod_context = context_representation.apply_representation( - mod_op + def _process_modality(self, modality): + local_results = UnimodalResults( + modalities=[modality], tasks=self.tasks, debug=False + ) + context_operators = self.operator_registry.get_context_operators() + + for context_operator in context_operators: + context_representation = None + if modality.modality_type != ModalityType.TEXT: + con_op = context_operator() + print("context_operator ", con_op.name) + context_representation = modality.context(con_op) + self._evaluate_local(context_representation, [con_op], local_results) + + modality_specific_operators = self.operator_registry.get_representations( + modality.modality_type + ) + for modality_specific_operator in modality_specific_operators: + mod_context = None + mod_op = modality_specific_operator() + if context_representation is not None: + print("before context" + mod_op.name) + mod_context = context_representation.apply_representation(mod_op) + print("after context" + mod_op.name) + self._evaluate_local(mod_context, [con_op, mod_op], local_results) + + print("before " + mod_op.name) + mod = modality.apply_representation(mod_op) + print("after " + mod_op.name) + self._evaluate_local(mod, [mod_op], local_results) + + for context_operator_after in context_operators: + con_op_after = context_operator_after() + if mod_context is not None: + mod_context = mod_context.context(con_op_after) + self._evaluate_local( + mod_context, [con_op, mod_op, con_op_after], local_results ) - self.evaluate(mod_context, [con_op, mod_op]) - mod = modality.apply_representation(mod_op) - self.evaluate(mod, [mod_op]) + mod = mod.context(con_op_after) + self._evaluate_local(mod, [mod_op, con_op_after], local_results) - for context_operator_after in context_operators: - con_op_after = context_operator_after() - if mod_context is not None: - mod_context = mod_context.context(con_op_after) - self.evaluate(mod_context, [con_op, mod_op, con_op_after]) + return local_results - mod = mod.context(con_op_after) - self.evaluate(mod, [mod_op, con_op_after]) + def _merge_results(self, local_results): + """Merge local results into the main results""" + for modality_id in local_results.results: + for task_name in local_results.results[modality_id]: + self.operator_performance.results[modality_id][task_name].extend( + local_results.results[modality_id][task_name] + ) - def evaluate(self, modality, representations): + def _evaluate_local(self, modality, representations, local_results): if self._tasks_require_same_dims: if self.expected_dimensions == 1 and get_shape(modality.metadata) > 1: + print("aggregate") # for aggregation in Aggregation().get_aggregation_functions(): - agg_operator = AggregatedRepresentation( - Aggregation() - ) + agg_operator = AggregatedRepresentation(Aggregation()) agg_modality = agg_operator.transform(modality) - + print("aggregated") reps = representations.copy() reps.append(agg_operator) agg_modality.pad() for task in self.tasks: scores = task.run(agg_modality.data) - self.operator_performance.add_result( + local_results.add_result( scores, reps, modality.modality_id, task.model.name ) else: + print("padd") modality.pad() + print("done pad") for task in self.tasks: scores = task.run(modality.data) - self.operator_performance.add_result( + local_results.add_result( scores, representations, modality.modality_id, task.model.name ) else: for task in self.tasks: if task.expected_dim == 1 and get_shape(modality.metadata) > 1: # for aggregation in Aggregation().get_aggregation_functions(): - agg_operator = AggregatedRepresentation( - Aggregation() - ) + agg_operator = AggregatedRepresentation(Aggregation()) agg_modality = agg_operator.transform(modality) reps = representations.copy() @@ -135,13 +176,13 @@ def evaluate(self, modality, representations): modality.pad() scores = task.run(agg_modality.data) - self.operator_performance.add_result( + local_results.add_result( scores, reps, modality.modality_id, task.model.name ) else: modality.pad() scores = task.run(modality.data) - self.operator_performance.add_result( + local_results.add_result( scores, representations, modality.modality_id, task.model.name ) @@ -169,7 +210,7 @@ def add_result(self, scores, representations, modality_id, task_name): continue params = {} - for param in rep.parameters.keys(): + for param in list(rep.parameters.keys()): params[param] = getattr(rep, param) if isinstance(rep, WindowAggregation): diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py index 3b1076b3252..32e68eff086 100644 --- a/src/main/python/systemds/scuro/modality/modality.py +++ b/src/main/python/systemds/scuro/modality/modality.py @@ -145,3 +145,15 @@ def has_data(self): def has_metadata(self): return self.metadata is not None and self.metadata != {} + + def is_aligned(self, other_modality): + aligned = True + for i in range(len(self.data)): + if ( + list(self.metadata.values())[i]["data_layout"]["shape"] + != list(other_modality.metadata.values())[i]["data_layout"]["shape"] + ): + aligned = False + continue + + return aligned diff --git a/src/main/python/systemds/scuro/modality/type.py b/src/main/python/systemds/scuro/modality/type.py index a479e07085d..c0baaf4c6aa 100644 --- a/src/main/python/systemds/scuro/modality/type.py +++ b/src/main/python/systemds/scuro/modality/type.py @@ -99,11 +99,15 @@ def update_base_metadata(cls, md, data, data_is_single_instance=True): dtype = np.nan shape = None if data_layout is DataLayout.SINGLE_LEVEL: - dtype = data.dtype - shape = data.shape - elif data_layout is DataLayout.NESTED_LEVEL: - shape = data[0].shape dtype = data[0].dtype + shape = data[0].shape + elif data_layout is DataLayout.NESTED_LEVEL: + if data_is_single_instance: + dtype = data.dtype + shape = data.shape + else: + shape = data[0].shape + dtype = data[0].dtype md["data_layout"].update( {"representation": data_layout, "type": dtype, "shape": shape} @@ -241,9 +245,9 @@ def get_data_layout(cls, data, data_is_single_instance): if data_is_single_instance: if isinstance(data, list): - return DataLayout.NESTED_LEVEL - elif isinstance(data, np.ndarray): return DataLayout.SINGLE_LEVEL + elif isinstance(data, np.ndarray): + return DataLayout.NESTED_LEVEL if isinstance(data[0], list): return DataLayout.NESTED_LEVEL diff --git a/src/main/python/systemds/scuro/representations/aggregate.py b/src/main/python/systemds/scuro/representations/aggregate.py index 506d16f8d08..1e73c81696d 100644 --- a/src/main/python/systemds/scuro/representations/aggregate.py +++ b/src/main/python/systemds/scuro/representations/aggregate.py @@ -52,7 +52,7 @@ def __init__(self, aggregation_function="mean", pad_modality=False, params=None) aggregation_function = params["aggregation_function"] pad_modality = params["pad_modality"] - if aggregation_function not in self._aggregation_function.keys(): + if aggregation_function not in list(self._aggregation_function.keys()): raise ValueError("Invalid aggregation function") self._aggregation_func = self._aggregation_function[aggregation_function] @@ -101,4 +101,4 @@ def aggregate_instance(self, instance): return self._aggregation_func(instance) def get_aggregation_functions(self): - return self._aggregation_function.keys() + return list(self._aggregation_function.keys()) diff --git a/src/main/python/systemds/scuro/representations/representation.py b/src/main/python/systemds/scuro/representations/representation.py index a9f283b6fe3..6137baf46dc 100644 --- a/src/main/python/systemds/scuro/representations/representation.py +++ b/src/main/python/systemds/scuro/representations/representation.py @@ -32,7 +32,7 @@ def parameters(self): def get_current_parameters(self): current_params = {} - for parameter in self.parameters.keys(): + for parameter in list(self.parameters.keys()): current_params[parameter] = getattr(self, parameter) return current_params diff --git a/src/main/python/systemds/scuro/representations/window_aggregation.py b/src/main/python/systemds/scuro/representations/window_aggregation.py index bff63729c7b..773399eecdf 100644 --- a/src/main/python/systemds/scuro/representations/window_aggregation.py +++ b/src/main/python/systemds/scuro/representations/window_aggregation.py @@ -62,7 +62,7 @@ def execute(self, modality): windowed_data.append(windowed_instance) - return windowed_data + return np.array(windowed_data) def window_aggregate_single_level(self, instance, new_length): if isinstance(instance, str): From f2760f13968455e95941d4d68ad583f8bf518f89 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Wed, 30 Jul 2025 15:55:17 +0200 Subject: [PATCH 08/19] print multimodal optimizations --- .../scuro/drsearch/multimodal_optimizer.py | 108 +++++++++++++----- .../scuro/drsearch/unimodal_optimizer.py | 9 -- 2 files changed, 79 insertions(+), 38 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index 8dd2273caca..4a5a2ec79e5 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -11,7 +11,7 @@ class MultimodalOptimizer: - def __init__(self, modalities, unimodal_optimizer, tasks, k=2): + def __init__(self, modalities, unimodal_optimizer, tasks, k=2, debug=True): self.k_best_modalities = None self.modalities = modalities self.unimodal_optimizer = unimodal_optimizer @@ -19,20 +19,17 @@ def __init__(self, modalities, unimodal_optimizer, tasks, k=2): self.k = k self.extract_k_best_modalities_per_task() self.operator_registry = Registry() - self.optimization_results = {} + self.optimization_results = MultimodalResults( + modalities, tasks, debug, self.k_best_modalities + ) self.cache = {} - for modality in self.modalities: - self.optimization_results[modality.modality_id] = {} - for task in tasks: - self.optimization_results[modality.modality_id][task.model.name] = ( - MultimodalResults(modality, task.name) - ) - def optimize(self): for task in self.tasks: for modality in self.modalities: - representations = self.k_best_modalities[task][modality.modality_id] + representations = self.k_best_modalities[task.model.name][ + modality.modality_id + ] applied_representations = self.extract_representations( representations, modality ) @@ -54,7 +51,10 @@ def optimize(self): combined, [i - 1, i], fusion_method, - [modality.modality_id], + [ + applied_representations[i - 1].modality_id, + applied_representations[i].modality_id, + ], ) if not fusion_method().commutative: combined_comm = applied_representations[i].combine( @@ -65,7 +65,10 @@ def optimize(self): combined_comm, [i, i - 1], fusion_method, - [modality.modality_id], + [ + applied_representations[i - 1].modality_id, + applied_representations[i].modality_id, + ], ) def extract_representations(self, representations, modality): @@ -106,13 +109,13 @@ def evaluate(self, task, modality, representations, fusion, modality_ids): reps = representations.copy() reps.append(agg_operator) - self.optimization_results[modality.modality_id][ - task.model.name - ].add_result(scores, reps, fusion, modality_ids, task) + self.optimization_results.add_result( + scores, reps, [fusion], modality_ids, task.model.name + ) else: scores = task.run(modality.data) - self.optimization_results[modality.modality_id][task.model.name].add_result( - scores, representations, fusion, modality_ids, task + self.optimization_results.add_result( + scores, representations, [fusion], modality_ids, task.model.name ) def add_to_cache(self, result_idx, combined_modality): @@ -121,33 +124,80 @@ def add_to_cache(self, result_idx, combined_modality): def extract_k_best_modalities_per_task(self): self.k_best_modalities = {} for task in self.tasks: - self.k_best_modalities[task] = {} + self.k_best_modalities[task.model.name] = {} for modality in self.modalities: - self.k_best_modalities[task][modality.modality_id] = ( + self.k_best_modalities[task.model.name][modality.modality_id] = ( self.unimodal_optimizer.get_k_best_results(modality, self.k, task) ) class MultimodalResults: - def __init__(self, modality, task): - self.modality_id = modality.modality_id - self.task = task - - self.results = [] + def __init__(self, modalities, tasks, debug, k_best_modalities): + self.modality_ids = [modality.modality_id for modality in modalities] + self.task_names = [task.model.name for task in tasks] + self.results = {} + self.debug = debug + self.k_best_modalities = k_best_modalities def add_result( - self, scores, best_representation_idx, fusion_method, modality_ids, task + self, scores, best_representation_idx, fusion_methods, modality_ids, task_name ): entry = MultimodalResultEntry( representations=best_representation_idx, train_score=scores[0], val_score=scores[1], - fusion_method=fusion_method.__name__, + fusion_methods=[fusion_method.__name__ for fusion_method in fusion_methods], modality_ids=modality_ids, - task=task, + task=task_name, ) - self.results.append(entry) + + modality_id_strings = "_".join(list(map(str, modality_ids))) + if not modality_id_strings in self.results: + self.results[modality_id_strings] = {} + self.results[modality_id_strings][task_name] = [] + + self.results[modality_id_strings][task_name].append(entry) + + def print_results(self): + for modality in self.results.keys(): + for task_name in self.task_names: + for entry in self.results[modality][task_name]: + reps = [] + for i, mod_idx in enumerate(entry.modality_ids): + reps.append( + self.k_best_modalities[task_name][mod_idx][ + entry.representations[i] + ] + ) + + print( + f"{modality}_{task_name}: " + f"Validation score: {entry.val_score} - Training score: {entry.train_score}" + ) + for i, rep in enumerate(reps): + print( + f" Representation: {entry.modality_ids[i]} - {rep.representations}" + ) + if i < len(reps) - 1: + print(f" Fusion: {entry.fusion_methods[i]} ") + + def store_results(self): + for modality in self.results.keys(): + for task_name in self.task_names: + for entry in self.results[modality][task_name]: + reps = [] + for i, mod_idx in enumerate(entry.modality_ids): + reps.append( + self.k_best_modalities[task_name][mod_idx][ + entry.representations[i] + ] + ) + entry.representations = reps + + import pickle + + pickle.dump(self.results, open("multimodal_results.p", "wb")) @dataclasses.dataclass @@ -155,6 +205,6 @@ class MultimodalResultEntry: val_score: float modality_ids: list representations: list - fusion_method: str + fusion_methods: list train_score: float task: str diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 589b1932913..e27b805ae54 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -96,7 +96,6 @@ def _process_modality(self, modality): context_representation = None if modality.modality_type != ModalityType.TEXT: con_op = context_operator() - print("context_operator ", con_op.name) context_representation = modality.context(con_op) self._evaluate_local(context_representation, [con_op], local_results) @@ -107,14 +106,10 @@ def _process_modality(self, modality): mod_context = None mod_op = modality_specific_operator() if context_representation is not None: - print("before context" + mod_op.name) mod_context = context_representation.apply_representation(mod_op) - print("after context" + mod_op.name) self._evaluate_local(mod_context, [con_op, mod_op], local_results) - print("before " + mod_op.name) mod = modality.apply_representation(mod_op) - print("after " + mod_op.name) self._evaluate_local(mod, [mod_op], local_results) for context_operator_after in context_operators: @@ -141,11 +136,9 @@ def _merge_results(self, local_results): def _evaluate_local(self, modality, representations, local_results): if self._tasks_require_same_dims: if self.expected_dimensions == 1 and get_shape(modality.metadata) > 1: - print("aggregate") # for aggregation in Aggregation().get_aggregation_functions(): agg_operator = AggregatedRepresentation(Aggregation()) agg_modality = agg_operator.transform(modality) - print("aggregated") reps = representations.copy() reps.append(agg_operator) agg_modality.pad() @@ -156,9 +149,7 @@ def _evaluate_local(self, modality, representations, local_results): scores, reps, modality.modality_id, task.model.name ) else: - print("padd") modality.pad() - print("done pad") for task in self.tasks: scores = task.run(modality.data) local_results.add_result( From 0a4f6cade8e8b73ff3fb96aa8f0a1627beccfe24 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Tue, 5 Aug 2025 14:50:02 +0200 Subject: [PATCH 09/19] refine unimodal optimizer --- .../scuro/drsearch/multimodal_optimizer.py | 107 ++++++++++-------- .../scuro/drsearch/unimodal_optimizer.py | 20 +++- .../representations/window_aggregation.py | 4 +- 3 files changed, 79 insertions(+), 52 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index 4a5a2ec79e5..e6d7abd25c3 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -26,50 +26,54 @@ def __init__(self, modalities, unimodal_optimizer, tasks, k=2, debug=True): def optimize(self): for task in self.tasks: - for modality in self.modalities: - representations = self.k_best_modalities[task.model.name][ - modality.modality_id - ] - applied_representations = self.extract_representations( - representations, modality - ) - combined_representations = [] - for i in range(1, len(applied_representations)): - for fusion_method in self.operator_registry.get_fusion_operators(): - if ( - fusion_method().needs_alignment - and not applied_representations[i - 1].is_aligned( - applied_representations[i] - ) - ): - continue - combined = applied_representations[i - 1].combine( - applied_representations[i], fusion_method() + self.optimize_intramodal_representations(task) + self.optimize_intermodal_representations(task) + + def optimize_intramodal_representations(self, task): + for modality in self.modalities: + representations = self.k_best_modalities[task.model.name][ + modality.modality_id + ] + applied_representations = self.extract_representations( + representations, modality + ) + + for i in range(1, len(applied_representations)): + for fusion_method in self.operator_registry.get_fusion_operators(): + if fusion_method().needs_alignment and not applied_representations[ + i - 1 + ].is_aligned(applied_representations[i]): + continue + combined = applied_representations[i - 1].combine( + applied_representations[i], fusion_method() + ) + self.evaluate( + task, + combined, + [i - 1, i], + fusion_method, + [ + applied_representations[i - 1].modality_id, + applied_representations[i].modality_id, + ], + ) + if not fusion_method().commutative: + combined_comm = applied_representations[i].combine( + applied_representations[i - 1], fusion_method() ) self.evaluate( task, - combined, - [i - 1, i], + combined_comm, + [i, i - 1], fusion_method, [ applied_representations[i - 1].modality_id, applied_representations[i].modality_id, ], ) - if not fusion_method().commutative: - combined_comm = applied_representations[i].combine( - applied_representations[i - 1], fusion_method() - ) - self.evaluate( - task, - combined_comm, - [i, i - 1], - fusion_method, - [ - applied_representations[i - 1].modality_id, - applied_representations[i].modality_id, - ], - ) + + def optimize_intermodal_representations(self, task): + pass def extract_representations(self, representations, modality): applied_representations = [] @@ -139,6 +143,9 @@ def __init__(self, modalities, tasks, debug, k_best_modalities): self.debug = debug self.k_best_modalities = k_best_modalities + for task in tasks: + self.results[task.model.name] = {} + def add_result( self, scores, best_representation_idx, fusion_methods, modality_ids, task_name ): @@ -153,16 +160,15 @@ def add_result( ) modality_id_strings = "_".join(list(map(str, modality_ids))) - if not modality_id_strings in self.results: - self.results[modality_id_strings] = {} - self.results[modality_id_strings][task_name] = [] + if not modality_id_strings in self.results[task_name]: + self.results[task_name][modality_id_strings] = [] - self.results[modality_id_strings][task_name].append(entry) + self.results[task_name][modality_id_strings].append(entry) def print_results(self): - for modality in self.results.keys(): - for task_name in self.task_names: - for entry in self.results[modality][task_name]: + for task_name in self.task_names: + for modality in self.results[task_name].keys(): + for entry in self.results[task_name][modality]: reps = [] for i, mod_idx in enumerate(entry.modality_ids): reps.append( @@ -182,10 +188,10 @@ def print_results(self): if i < len(reps) - 1: print(f" Fusion: {entry.fusion_methods[i]} ") - def store_results(self): - for modality in self.results.keys(): - for task_name in self.task_names: - for entry in self.results[modality][task_name]: + def store_results(self, file_name=None): + for task_name in self.task_names: + for modality in self.results[task_name].keys(): + for entry in self.results[task_name][modality]: reps = [] for i, mod_idx in enumerate(entry.modality_ids): reps.append( @@ -197,7 +203,14 @@ def store_results(self): import pickle - pickle.dump(self.results, open("multimodal_results.p", "wb")) + if file_name is None: + import time + + timestr = time.strftime("%Y%m%d-%H%M%S") + file_name = "multimodal_optimizer" + timestr + ".pkl" + + with open(file_name, "wb") as f: + pickle.dump(self.results, f) @dataclasses.dataclass diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index e27b805ae54..54c0ba52aed 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -49,6 +49,16 @@ def __init__(self, modalities, tasks, debug=True): if tasks[i - 1].expected_dim != tasks[i].expected_dim: self._tasks_require_same_dims = False + def store_results(self, file_name=None): + if file_name is None: + import time + + timestr = time.strftime("%Y%m%d-%H%M%S") + file_name = "unimodal_optimizer" + timestr + ".pkl" + + with open(file_name, "wb") as f: + pickle.dump(self.operator_performance, f) + def get_k_best_results(self, modality, k, task): """ Get the k best results for the given modality @@ -82,9 +92,10 @@ def optimize_parallel(self, n_workers=None): # except Exception as exc: # print(f'Modality {modality.modality_id} generated an exception: {exc}') - def optimize(self, n_workers=None): + def optimize(self): for modality in self.modalities: - self._process_modality(modality) + local_result = self._process_modality(modality) + self._merge_results(local_result) def _process_modality(self, modality): local_results = UnimodalResults( @@ -94,7 +105,10 @@ def _process_modality(self, modality): for context_operator in context_operators: context_representation = None - if modality.modality_type != ModalityType.TEXT: + if ( + modality.modality_type != ModalityType.TEXT + and modality.modality_type != ModalityType.VIDEO + ): con_op = context_operator() context_representation = modality.context(con_op) self._evaluate_local(context_representation, [con_op], local_results) diff --git a/src/main/python/systemds/scuro/representations/window_aggregation.py b/src/main/python/systemds/scuro/representations/window_aggregation.py index 773399eecdf..d17c703721b 100644 --- a/src/main/python/systemds/scuro/representations/window_aggregation.py +++ b/src/main/python/systemds/scuro/representations/window_aggregation.py @@ -62,7 +62,7 @@ def execute(self, modality): windowed_data.append(windowed_instance) - return np.array(windowed_data) + return windowed_data def window_aggregate_single_level(self, instance, new_length): if isinstance(instance, str): @@ -86,4 +86,4 @@ def window_aggregate_nested_level(self, instance, new_length): data[i * self.window_size : i * self.window_size + self.window_size] ) - return result + return np.array(result) From 9abf34e166694eb9ab76293ae0e58eceb18f3efa Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Tue, 5 Aug 2025 16:23:53 +0200 Subject: [PATCH 10/19] add timing --- .../scuro/drsearch/unimodal_optimizer.py | 46 ++++++++++++++++--- .../systemds/scuro/modality/modality.py | 1 + .../systemds/scuro/modality/transformed.py | 9 +++- .../scuro/modality/unimodal_modality.py | 7 ++- 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 54c0ba52aed..731e42623cd 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -19,6 +19,7 @@ # # ------------------------------------------------------------- import pickle +import time from concurrent.futures import ProcessPoolExecutor, as_completed from dataclasses import dataclass @@ -157,17 +158,31 @@ def _evaluate_local(self, modality, representations, local_results): reps.append(agg_operator) agg_modality.pad() for task in self.tasks: + start = time.time() scores = task.run(agg_modality.data) + end = time.time() local_results.add_result( - scores, reps, modality.modality_id, task.model.name + scores, + reps, + modality.modality_id, + task.model.name, + modality.transform_time, + end - start, ) else: modality.pad() for task in self.tasks: + start = time.time() scores = task.run(modality.data) + end = time.time() local_results.add_result( - scores, representations, modality.modality_id, task.model.name + scores, + representations, + modality.modality_id, + task.model.name, + modality.transform_time, + end - start, ) else: for task in self.tasks: @@ -179,16 +194,29 @@ def _evaluate_local(self, modality, representations, local_results): reps = representations.copy() reps.append(agg_operator) modality.pad() + start = time.time() scores = task.run(agg_modality.data) - + end = time.time() local_results.add_result( - scores, reps, modality.modality_id, task.model.name + scores, + reps, + modality.modality_id, + task.model.name, + modality.transform_time, + end - start, ) else: modality.pad() + start = time.time() scores = task.run(modality.data) + end = time.time() local_results.add_result( - scores, representations, modality.modality_id, task.model.name + scores, + representations, + modality.modality_id, + task.model.name, + modality.transform_time, + end - start, ) @@ -204,7 +232,9 @@ def __init__(self, modalities, tasks, debug=False): for task_name in self.task_names: self.results[modality][task_name] = [] - def add_result(self, scores, representations, modality_id, task_name): + def add_result( + self, scores, representations, modality_id, task_name, rep_time, task_time + ): parameters = [] representation_names = [] @@ -230,6 +260,8 @@ def add_result(self, scores, representations, modality_id, task_name): params=parameters, train_score=scores[0], val_score=scores[1], + representation_time=rep_time, + task_time=task_time, ) self.results[modality_id][task_name].append(entry) @@ -249,3 +281,5 @@ class ResultEntry: representations: list params: list train_score: float + representation_time: float + task_time: float diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py index 32e68eff086..272696940fe 100644 --- a/src/main/python/systemds/scuro/modality/modality.py +++ b/src/main/python/systemds/scuro/modality/modality.py @@ -44,6 +44,7 @@ def __init__( self.cost = None self.shape = None self.modality_id = modality_id + self.transform_time = None @property def data(self): diff --git a/src/main/python/systemds/scuro/modality/transformed.py b/src/main/python/systemds/scuro/modality/transformed.py index 1a292b495b0..63ca10251c4 100644 --- a/src/main/python/systemds/scuro/modality/transformed.py +++ b/src/main/python/systemds/scuro/modality/transformed.py @@ -26,6 +26,7 @@ from systemds.scuro.modality.joined import JoinedModality from systemds.scuro.modality.modality import Modality from systemds.scuro.representations.window_aggregation import WindowAggregation +import time class TransformedModality(Modality): @@ -73,19 +74,23 @@ def join(self, right, join_condition): def window_aggregation(self, windowSize, aggregation): w = WindowAggregation(windowSize, aggregation) transformed_modality = TransformedModality(self, w) + start = time.time() transformed_modality.data = w.execute(self) - + transformed_modality.transform_time = time.time() - start return transformed_modality def context(self, context_operator): transformed_modality = TransformedModality(self, context_operator) - + start = time.time() transformed_modality.data = context_operator.execute(self) + transformed_modality.transform_time = time.time() - start return transformed_modality def apply_representation(self, representation): + start = time.time() new_modality = representation.transform(self) new_modality.update_metadata() + new_modality.transform_time = time.time() - start return new_modality def combine(self, other: Union[Modality, List[Modality]], fusion_method): diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py b/src/main/python/systemds/scuro/modality/unimodal_modality.py index fb117aa32e8..8b318cf3a7f 100644 --- a/src/main/python/systemds/scuro/modality/unimodal_modality.py +++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- from functools import reduce from operator import or_ - +import time from systemds.scuro.dataloader.base_loader import BaseLoader from systemds.scuro.modality.modality import Modality @@ -86,12 +86,14 @@ def join(self, other, join_condition): return joined_modality def context(self, context_operator): + start = time.time() if not self.has_data(): self.extract_raw_data() transformed_modality = TransformedModality(self, context_operator) transformed_modality.data = context_operator.execute(self) + transformed_modality.transform_time = time.time() - start return transformed_modality def aggregate(self, aggregation_function): @@ -108,7 +110,7 @@ def apply_representation(self, representation): representation, ) new_modality.data = [] - + start = time.time() if self.data_loader.chunk_size: self.data_loader.reset() while self.data_loader.next_chunk < self.data_loader.num_chunks: @@ -122,4 +124,5 @@ def apply_representation(self, representation): new_modality = representation.transform(self) new_modality.update_metadata() + new_modality.transform_time = time.time() - start return new_modality From 302501e9e91f8d7ab2027f5a9d5293d7f2addf74 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Wed, 6 Aug 2025 15:51:25 +0200 Subject: [PATCH 11/19] add padding --- .../systemds/scuro/modality/modality.py | 4 ++- .../python/systemds/scuro/modality/type.py | 9 ++++- .../scuro/modality/unimodal_modality.py | 36 ++++++++++++++++++- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py index 272696940fe..e4240690f18 100644 --- a/src/main/python/systemds/scuro/modality/modality.py +++ b/src/main/python/systemds/scuro/modality/modality.py @@ -62,9 +62,11 @@ def get_modality_names(self) -> List[str]: """ Extracts the individual unimodal modalities for a given transformed modality. """ - return [ + modality_names = [ modality.name for modality in ModalityType if modality in self.modality_type ] + modality_names.append(str(self.modality_id)) + return modality_names def copy_from_instance(self): """ diff --git a/src/main/python/systemds/scuro/modality/type.py b/src/main/python/systemds/scuro/modality/type.py index c0baaf4c6aa..e04ef685689 100644 --- a/src/main/python/systemds/scuro/modality/type.py +++ b/src/main/python/systemds/scuro/modality/type.py @@ -26,6 +26,7 @@ calculate_new_frequency, create_timestamps, ) +import torch # TODO: needs a way to define if data comes from a dataset with multiple instances or is like a streaming scenario where we only have one instance @@ -203,6 +204,12 @@ def add_field(self, md, field, data): md[field] = data return md + def add_field_for_instances(self, md, field, data): + for key, value in zip(md.keys(), data): + md[key].update({field: value}) + + return md + def create_audio_metadata(self, sampling_rate, data): md = deepcopy(self.get_schema()) md = ModalitySchemas.update_base_metadata(md, data, True) @@ -246,7 +253,7 @@ def get_data_layout(cls, data, data_is_single_instance): if data_is_single_instance: if isinstance(data, list): return DataLayout.SINGLE_LEVEL - elif isinstance(data, np.ndarray): + elif isinstance(data, np.ndarray) or isinstance(data, torch.Tensor): return DataLayout.NESTED_LEVEL if isinstance(data[0], list): diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py b/src/main/python/systemds/scuro/modality/unimodal_modality.py index 8b318cf3a7f..edcab8cec75 100644 --- a/src/main/python/systemds/scuro/modality/unimodal_modality.py +++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py @@ -21,7 +21,8 @@ from functools import reduce from operator import or_ import time - +import numpy as np +from systemds.scuro import ModalityType from systemds.scuro.dataloader.base_loader import BaseLoader from systemds.scuro.modality.modality import Modality from systemds.scuro.modality.joined import JoinedModality @@ -113,11 +114,44 @@ def apply_representation(self, representation): start = time.time() if self.data_loader.chunk_size: self.data_loader.reset() + original_lengths = [] while self.data_loader.next_chunk < self.data_loader.num_chunks: self.extract_raw_data() transformed_chunk = representation.transform(self) new_modality.data.extend(transformed_chunk.data) + for d in transformed_chunk.data: + original_lengths.append(d.shape[0]) new_modality.metadata.update(transformed_chunk.metadata) + + target_length = max(original_lengths) + padded_embeddings = [] + for embeddings in new_modality.data: + current_length = embeddings.shape[0] + if current_length < target_length: + padding_needed = target_length - current_length + + padded = np.pad( + embeddings, + pad_width=( + (0, padding_needed), + (0, 0), + ), # (before, after) for each axis + mode="constant", + constant_values=0, + ) + padded_embeddings.append(padded) + else: + padded_embeddings.append(embeddings) + + attention_masks = np.zeros((len(new_modality.data), target_length)) + for i, length in enumerate(original_lengths): + attention_masks[i, :length] = 1 + + ModalityType(self.modality_type).add_field_for_instances( + new_modality.metadata, "attention_masks", attention_masks + ) + new_modality.data = padded_embeddings + else: if not self.has_data(): self.extract_raw_data() From 6d31982caf10898373c6d2f4d6ac3f6584e91da8 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Thu, 7 Aug 2025 11:19:38 +0200 Subject: [PATCH 12/19] adapt padding to window operations --- .../systemds/scuro/drsearch/dr_search.py | 2 +- .../scuro/drsearch/unimodal_optimizer.py | 8 +-- .../scuro/modality/joined_transformed.py | 3 +- .../python/systemds/scuro/modality/type.py | 18 +++++-- .../scuro/modality/unimodal_modality.py | 21 +++++--- .../systemds/scuro/representations/bert.py | 53 ++++++++++--------- .../scuro/representations/mel_spectrogram.py | 9 ++-- .../systemds/scuro/representations/mfcc.py | 9 ++-- .../scuro/representations/spectrogram.py | 9 ++-- .../systemds/scuro/representations/tfidf.py | 3 +- .../systemds/scuro/representations/wav2vec.py | 4 +- .../representations/window_aggregation.py | 47 ++++++++++++++-- .../scuro/representations/word2vec.py | 4 +- .../systemds/scuro/utils/static_variables.py | 9 ++++ src/main/python/tests/scuro/data_generator.py | 37 +++++++------ .../tests/scuro/test_multimodal_join.py | 4 -- .../tests/scuro/test_unimodal_optimizer.py | 16 ++---- 17 files changed, 159 insertions(+), 97 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/dr_search.py b/src/main/python/systemds/scuro/drsearch/dr_search.py index 2000608a1df..601001c7428 100644 --- a/src/main/python/systemds/scuro/drsearch/dr_search.py +++ b/src/main/python/systemds/scuro/drsearch/dr_search.py @@ -76,7 +76,7 @@ def set_best_params( """ # check if modality name is already in dictionary - if "_".join(modality_names) not in self.scores.keys(): + if "_".join(modality_names) not in list(self.scores.keys()): # if not add it to dictionary self.scores["_".join(modality_names)] = {} diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 731e42623cd..21dce69840c 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -156,7 +156,7 @@ def _evaluate_local(self, modality, representations, local_results): agg_modality = agg_operator.transform(modality) reps = representations.copy() reps.append(agg_operator) - agg_modality.pad() + # agg_modality.pad() for task in self.tasks: start = time.time() scores = task.run(agg_modality.data) @@ -171,7 +171,7 @@ def _evaluate_local(self, modality, representations, local_results): end - start, ) else: - modality.pad() + # modality.pad() for task in self.tasks: start = time.time() scores = task.run(modality.data) @@ -193,7 +193,7 @@ def _evaluate_local(self, modality, representations, local_results): reps = representations.copy() reps.append(agg_operator) - modality.pad() + # modality.pad() start = time.time() scores = task.run(agg_modality.data) end = time.time() @@ -206,7 +206,7 @@ def _evaluate_local(self, modality, representations, local_results): end - start, ) else: - modality.pad() + # modality.pad() start = time.time() scores = task.run(modality.data) end = time.time() diff --git a/src/main/python/systemds/scuro/modality/joined_transformed.py b/src/main/python/systemds/scuro/modality/joined_transformed.py index 6c6190e03cc..3e0d8fb9dfb 100644 --- a/src/main/python/systemds/scuro/modality/joined_transformed.py +++ b/src/main/python/systemds/scuro/modality/joined_transformed.py @@ -36,7 +36,8 @@ def __init__(self, left_modality, right_modality, transformation): :param transformation: Representation to be applied on the modality """ super().__init__( - reduce(or_, [left_modality.modality_type], right_modality.modality_type) + reduce(or_, [left_modality.modality_type], right_modality.modality_type), + data_type=left_modality.data_type, ) self.transformation = transformation self.left_modality = left_modality diff --git a/src/main/python/systemds/scuro/modality/type.py b/src/main/python/systemds/scuro/modality/type.py index e04ef685689..b2331d0faed 100644 --- a/src/main/python/systemds/scuro/modality/type.py +++ b/src/main/python/systemds/scuro/modality/type.py @@ -100,8 +100,12 @@ def update_base_metadata(cls, md, data, data_is_single_instance=True): dtype = np.nan shape = None if data_layout is DataLayout.SINGLE_LEVEL: - dtype = data[0].dtype - shape = data[0].shape + if isinstance(data, list): + dtype = data[0].dtype + shape = data[0].shape + elif isinstance(data, np.ndarray): + dtype = data.dtype + shape = data.shape elif data_layout is DataLayout.NESTED_LEVEL: if data_is_single_instance: dtype = data.dtype @@ -210,9 +214,9 @@ def add_field_for_instances(self, md, field, data): return md - def create_audio_metadata(self, sampling_rate, data): + def create_audio_metadata(self, sampling_rate, data, is_single_instance=True): md = deepcopy(self.get_schema()) - md = ModalitySchemas.update_base_metadata(md, data, True) + md = ModalitySchemas.update_base_metadata(md, data, is_single_instance) md["frequency"] = sampling_rate md["length"] = data.shape[0] md["timestamp"] = create_timestamps(sampling_rate, md["length"]) @@ -251,7 +255,11 @@ def get_data_layout(cls, data, data_is_single_instance): return None if data_is_single_instance: - if isinstance(data, list): + if ( + isinstance(data, list) + or isinstance(data, np.ndarray) + and data.ndim == 1 + ): return DataLayout.SINGLE_LEVEL elif isinstance(data, np.ndarray) or isinstance(data, torch.Tensor): return DataLayout.NESTED_LEVEL diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py b/src/main/python/systemds/scuro/modality/unimodal_modality.py index edcab8cec75..94d1fa057d9 100644 --- a/src/main/python/systemds/scuro/modality/unimodal_modality.py +++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py @@ -112,9 +112,9 @@ def apply_representation(self, representation): ) new_modality.data = [] start = time.time() + original_lengths = [] if self.data_loader.chunk_size: self.data_loader.reset() - original_lengths = [] while self.data_loader.next_chunk < self.data_loader.num_chunks: self.extract_raw_data() transformed_chunk = representation.transform(self) @@ -122,7 +122,18 @@ def apply_representation(self, representation): for d in transformed_chunk.data: original_lengths.append(d.shape[0]) new_modality.metadata.update(transformed_chunk.metadata) + else: + if not self.has_data(): + self.extract_raw_data() + new_modality = representation.transform(self) + + if not all( + "attention_masks" in entry for entry in new_modality.metadata.values() + ): + for d in new_modality.data: + original_lengths.append(d.shape[0]) + if len(original_lengths) > 0 and min(original_lengths) < max(original_lengths): target_length = max(original_lengths) padded_embeddings = [] for embeddings in new_modality.data: @@ -135,7 +146,7 @@ def apply_representation(self, representation): pad_width=( (0, padding_needed), (0, 0), - ), # (before, after) for each axis + ), mode="constant", constant_values=0, ) @@ -151,12 +162,6 @@ def apply_representation(self, representation): new_modality.metadata, "attention_masks", attention_masks ) new_modality.data = padded_embeddings - - else: - if not self.has_data(): - self.extract_raw_data() - new_modality = representation.transform(self) - new_modality.update_metadata() new_modality.transform_time = time.time() - start return new_modality diff --git a/src/main/python/systemds/scuro/representations/bert.py b/src/main/python/systemds/scuro/representations/bert.py index 8d8d40f4fd7..e6611f0b7c9 100644 --- a/src/main/python/systemds/scuro/representations/bert.py +++ b/src/main/python/systemds/scuro/representations/bert.py @@ -34,12 +34,13 @@ @register_representation(ModalityType.TEXT) class Bert(UnimodalRepresentation): - def __init__(self, model_name="bert", output_file=None): + def __init__(self, model_name="bert", output_file=None, max_seq_length=512): parameters = {"model_name": "bert"} self.model_name = model_name super().__init__("Bert", ModalityType.EMBEDDING, parameters) self.output_file = output_file + self.max_seq_length = max_seq_length def transform(self, modality): transformed_modality = TransformedModality(modality, self) @@ -59,28 +60,28 @@ def transform(self, modality): return transformed_modality def create_embeddings(self, modality, model, tokenizer): - embeddings = [] - for i, d in enumerate(modality.data): - inputs = tokenizer( - d, - return_offsets_mapping=True, - return_tensors="pt", - padding=True, - truncation=True, - ) - - ModalityType.TEXT.add_field( - list(modality.metadata.values())[i], - "token_to_character_mapping", - inputs.data["offset_mapping"][0].tolist(), - ) - - del inputs.data["offset_mapping"] - - with torch.no_grad(): - outputs = model(**inputs) - - cls_embedding = outputs.last_hidden_state[0].numpy() - embeddings.append(cls_embedding) - - return embeddings + inputs = tokenizer( + modality.data, + return_offsets_mapping=True, + return_tensors="pt", + padding="longest", + return_attention_mask=True, + truncation=True, + ) + ModalityType.TEXT.add_field_for_instances( + modality.metadata, + "token_to_character_mapping", + inputs.data["offset_mapping"].tolist(), + ) + + ModalityType.TEXT.add_field_for_instances( + modality.metadata, "attention_masks", inputs.data["attention_mask"].tolist() + ) + del inputs.data["offset_mapping"] + + with torch.no_grad(): + outputs = model(**inputs) + + cls_embedding = outputs.last_hidden_state.detach().numpy() + + return cls_embedding diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py b/src/main/python/systemds/scuro/representations/mel_spectrogram.py index 8c14c03ac60..8e897542b0c 100644 --- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py +++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py @@ -46,19 +46,18 @@ def transform(self, modality): modality, self, self.output_modality_type ) result = [] - max_length = 0 + for i, sample in enumerate(modality.data): sr = list(modality.metadata.values())[i]["frequency"] S = librosa.feature.melspectrogram( - y=sample, + y=np.array(sample), sr=sr, n_mels=self.n_mels, hop_length=self.hop_length, n_fft=self.n_fft, - ) + ).astype(modality.data_type) S_dB = librosa.power_to_db(S, ref=np.max) - if S_dB.shape[-1] > max_length: - max_length = S_dB.shape[-1] + result.append(S_dB.T) transformed_modality.data = result diff --git a/src/main/python/systemds/scuro/representations/mfcc.py b/src/main/python/systemds/scuro/representations/mfcc.py index 234e93246fd..00f735a756e 100644 --- a/src/main/python/systemds/scuro/representations/mfcc.py +++ b/src/main/python/systemds/scuro/representations/mfcc.py @@ -48,20 +48,19 @@ def transform(self, modality): modality, self, self.output_modality_type ) result = [] - max_length = 0 + for i, sample in enumerate(modality.data): sr = list(modality.metadata.values())[i]["frequency"] mfcc = librosa.feature.mfcc( - y=sample, + y=np.array(sample), sr=sr, n_mfcc=self.n_mfcc, dct_type=self.dct_type, hop_length=self.hop_length, n_mels=self.n_mels, - ) + ).astype(modality.data_type) mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc) - if mfcc.shape[-1] > max_length: # TODO: check if this needs to be done - max_length = mfcc.shape[-1] + result.append(mfcc.T) transformed_modality.data = result diff --git a/src/main/python/systemds/scuro/representations/spectrogram.py b/src/main/python/systemds/scuro/representations/spectrogram.py index 6a713a3d21c..5fb1780536d 100644 --- a/src/main/python/systemds/scuro/representations/spectrogram.py +++ b/src/main/python/systemds/scuro/representations/spectrogram.py @@ -41,14 +41,13 @@ def transform(self, modality): modality, self, self.output_modality_type ) result = [] - max_length = 0 + for i, sample in enumerate(modality.data): spectrogram = librosa.stft( - y=sample, hop_length=self.hop_length, n_fft=self.n_fft - ) + y=np.array(sample), hop_length=self.hop_length, n_fft=self.n_fft + ).astype(modality.data_type) S_dB = librosa.amplitude_to_db(np.abs(spectrogram)) - if S_dB.shape[-1] > max_length: - max_length = S_dB.shape[-1] + result.append(S_dB.T) transformed_modality.data = result diff --git a/src/main/python/systemds/scuro/representations/tfidf.py b/src/main/python/systemds/scuro/representations/tfidf.py index 1df5a1fde08..c26039f46a9 100644 --- a/src/main/python/systemds/scuro/representations/tfidf.py +++ b/src/main/python/systemds/scuro/representations/tfidf.py @@ -43,8 +43,7 @@ def transform(self, modality): vectorizer = TfidfVectorizer(min_df=self.min_df) X = vectorizer.fit_transform(modality.data) - X = [np.array(x).reshape(1, -1) for x in X.toarray()] - + X = [np.array(x).astype(np.float32).reshape(1, -1) for x in X.toarray()] if self.output_file is not None: save_embeddings(X, self.output_file) diff --git a/src/main/python/systemds/scuro/representations/wav2vec.py b/src/main/python/systemds/scuro/representations/wav2vec.py index 29f5bcbea02..86145e3769e 100644 --- a/src/main/python/systemds/scuro/representations/wav2vec.py +++ b/src/main/python/systemds/scuro/representations/wav2vec.py @@ -52,7 +52,9 @@ def transform(self, modality): result = [] for i, sample in enumerate(modality.data): sr = list(modality.metadata.values())[i]["frequency"] - audio_resampled = librosa.resample(sample, orig_sr=sr, target_sr=16000) + audio_resampled = librosa.resample( + np.array(sample), orig_sr=sr, target_sr=16000 + ) input = self.processor( audio_resampled, sampling_rate=16000, return_tensors="pt", padding=True ) diff --git a/src/main/python/systemds/scuro/representations/window_aggregation.py b/src/main/python/systemds/scuro/representations/window_aggregation.py index d17c703721b..167f4adafea 100644 --- a/src/main/python/systemds/scuro/representations/window_aggregation.py +++ b/src/main/python/systemds/scuro/representations/window_aggregation.py @@ -21,7 +21,7 @@ import numpy as np import math -from systemds.scuro.modality.type import DataLayout +from systemds.scuro.modality.type import DataLayout, ModalityType from systemds.scuro.drsearch.operator_registry import register_context_operator from systemds.scuro.representations.aggregate import Aggregation @@ -30,7 +30,7 @@ @register_context_operator() class WindowAggregation(Context): - def __init__(self, window_size=10, aggregation_function="mean"): + def __init__(self, window_size=10, aggregation_function="mean", pad=True): parameters = { "window_size": [window_size], "aggregation_function": list(Aggregation().get_aggregation_functions()), @@ -38,6 +38,7 @@ def __init__(self, window_size=10, aggregation_function="mean"): super().__init__("WindowAggregation", parameters) self.window_size = window_size self.aggregation_function = aggregation_function + self.pad = pad @property def aggregation_function(self): @@ -49,6 +50,7 @@ def aggregation_function(self, value): def execute(self, modality): windowed_data = [] + original_lengths = [] for instance in modality.data: new_length = math.ceil(len(instance) / self.window_size) if modality.get_data_layout() == DataLayout.SINGLE_LEVEL: @@ -59,14 +61,53 @@ def execute(self, modality): windowed_instance = self.window_aggregate_nested_level( instance, new_length ) - + original_lengths.append(new_length) windowed_data.append(windowed_instance) + if self.pad and not isinstance(windowed_data, np.ndarray): + target_length = max(original_lengths) + sample_shape = windowed_data[0].shape + is_1d = len(sample_shape) == 1 + + padded_features = [] + for i, features in enumerate(windowed_data): + current_len = original_lengths[i] + + if current_len < target_length: + padding_needed = target_length - current_len + + if is_1d: + padding = np.zeros(padding_needed) + padded = np.concatenate([features, padding]) + else: + feature_dim = features.shape[-1] + padding = np.zeros((padding_needed, feature_dim)) + padded = np.concatenate([features, padding], axis=0) + + padded_features.append(padded) + else: + padded_features.append(features) + + attention_masks = np.zeros((len(windowed_data), target_length)) + for i, length in enumerate(original_lengths): + actual_length = min(length, target_length) + attention_masks[i, :actual_length] = 1 + + ModalityType(modality.modality_type).add_field_for_instances( + modality.metadata, "attention_masks", attention_masks + ) + + windowed_data = np.array(padded_features) + data_type = list(modality.metadata.values())[0]["data_layout"]["type"] + if data_type != "str": + windowed_data = windowed_data.astype(data_type) + return windowed_data def window_aggregate_single_level(self, instance, new_length): if isinstance(instance, str): return instance + instance = np.array(instance) num_cols = instance.shape[1] if instance.ndim > 1 else 1 result = np.empty((new_length, num_cols)) for i in range(0, new_length): diff --git a/src/main/python/systemds/scuro/representations/word2vec.py b/src/main/python/systemds/scuro/representations/word2vec.py index 0210207a013..aa28499e636 100644 --- a/src/main/python/systemds/scuro/representations/word2vec.py +++ b/src/main/python/systemds/scuro/representations/word2vec.py @@ -65,7 +65,9 @@ def transform(self, modality): embeddings = [] for sentences in modality.data: tokens = list(tokenize(sentences.lower())) - embeddings.append(np.array(get_embedding(tokens, model)).reshape(1, -1)) + embeddings.append( + np.array(get_embedding(tokens, model)).reshape(1, -1).astype(np.float32) + ) if self.output_file is not None: save_embeddings(np.array(embeddings), self.output_file) diff --git a/src/main/python/systemds/scuro/utils/static_variables.py b/src/main/python/systemds/scuro/utils/static_variables.py index b1733387160..b1b3e657a24 100644 --- a/src/main/python/systemds/scuro/utils/static_variables.py +++ b/src/main/python/systemds/scuro/utils/static_variables.py @@ -1,7 +1,16 @@ import numpy as np +import torch global_rng = np.random.default_rng(42) def get_seed(): return global_rng.integers(0, 1024) + + +def get_device(): + return torch.device( + "cuda:0" + if torch.cuda.is_available() + else "mps" if torch.mps.is_available() else "cpu" + ) diff --git a/src/main/python/tests/scuro/data_generator.py b/src/main/python/tests/scuro/data_generator.py index fbb50ac180e..e2dceec329d 100644 --- a/src/main/python/tests/scuro/data_generator.py +++ b/src/main/python/tests/scuro/data_generator.py @@ -93,10 +93,14 @@ def create1DModality( self.modality_id += 1 return tf_modality - def create_audio_data(self, num_instances, num_features): - data = np.random.rand(num_instances, num_features).astype(np.float32) + def create_audio_data(self, num_instances, max_audio_length): + data = [ + [random.random() for _ in range(random.randint(1, max_audio_length))] + for _ in range(num_instances) + ] + metadata = { - i: ModalityType.AUDIO.create_audio_metadata(16000, data[i]) + i: ModalityType.AUDIO.create_audio_metadata(16000, np.array(data[i])) for i in range(num_instances) } @@ -165,26 +169,29 @@ def create_text_data(self, num_instances): return sentences, metadata - def create_visual_modality(self, num_instances, num_frames=1, height=28, width=28): - if num_frames == 1: + def create_visual_modality( + self, num_instances, max_num_frames=1, height=28, width=28 + ): + data = [ + np.random.randint( + 0, + 256, + (np.random.randint(5, max_num_frames + 1), height, width, 3), + dtype=np.uint8, + ) + for _ in range(num_instances) + ] + if max_num_frames == 1: print(f"TODO: create image metadata") else: metadata = { i: ModalityType.VIDEO.create_video_metadata( - 30, num_frames, width, height, 1 + 30, data[i].shape[0], width, height, 3 ) for i in range(num_instances) } - return ( - np.random.randint( - 0, - 256, - (num_instances, num_frames, height, width), - # ).astype(np.float16).tolist(), - ).astype(np.float16), - metadata, - ) + return (data, metadata) def setup_data(modalities, num_instances, path): diff --git a/src/main/python/tests/scuro/test_multimodal_join.py b/src/main/python/tests/scuro/test_multimodal_join.py index 9e3a16ffcad..5fd22dc8d98 100644 --- a/src/main/python/tests/scuro/test_multimodal_join.py +++ b/src/main/python/tests/scuro/test_multimodal_join.py @@ -20,7 +20,6 @@ # TODO: Test edge cases: unequal number of audio-video timestamps (should still work and add the average over all audio/video samples) -import shutil import unittest import numpy as np @@ -30,9 +29,6 @@ from systemds.scuro.representations.mel_spectrogram import MelSpectrogram from systemds.scuro.representations.resnet import ResNet from tests.scuro.data_generator import TestDataLoader, ModalityRandomDataGenerator - -from systemds.scuro.dataloader.audio_loader import AudioLoader -from systemds.scuro.dataloader.video_loader import VideoLoader from systemds.scuro.modality.type import ModalityType diff --git a/src/main/python/tests/scuro/test_unimodal_optimizer.py b/src/main/python/tests/scuro/test_unimodal_optimizer.py index 41bd2af1367..192567e92ee 100644 --- a/src/main/python/tests/scuro/test_unimodal_optimizer.py +++ b/src/main/python/tests/scuro/test_unimodal_optimizer.py @@ -182,23 +182,17 @@ def optimize_unimodal_representation_for_modality(self, modality): ): registry = Registry() - unimodal_optimizer = UnimodalOptimizer( - [modality], self.tasks - ) + unimodal_optimizer = UnimodalOptimizer([modality], self.tasks) unimodal_optimizer.optimize() assert ( - list(unimodal_optimizer.operator_performance.keys())[0] + unimodal_optimizer.operator_performance.modality_ids[0] == modality.modality_id ) - assert len(list(unimodal_optimizer.operator_performance.values())[0]) == 2 + assert len(unimodal_optimizer.operator_performance.task_names) == 2 assert ( - len( - unimodal_optimizer.get_k_best_results(modality, 1, self.tasks[0])[ - 0 - ].representations - ) - >= 1 + len(unimodal_optimizer.get_k_best_results(modality, 1, self.tasks[0])) + == 1 ) From 1c37f2e8645a149f79b434a701167814d4230ab3 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Thu, 7 Aug 2025 16:59:20 +0200 Subject: [PATCH 13/19] add cache --- .../scuro/drsearch/multimodal_optimizer.py | 124 ++++++++++++++---- .../scuro/drsearch/unimodal_optimizer.py | 95 ++++++++------ .../systemds/scuro/modality/modality.py | 20 +-- .../scuro/representations/word2vec.py | 4 +- 4 files changed, 166 insertions(+), 77 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index e6d7abd25c3..ff5c431aa36 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -1,3 +1,5 @@ +import itertools + from systemds.scuro.representations.aggregated_representation import ( AggregatedRepresentation, ) @@ -11,13 +13,17 @@ class MultimodalOptimizer: - def __init__(self, modalities, unimodal_optimizer, tasks, k=2, debug=True): + def __init__( + self, modalities, unimodal_optimization_results, tasks, k=2, debug=True + ): + self.k_best_cache = None self.k_best_modalities = None self.modalities = modalities - self.unimodal_optimizer = unimodal_optimizer + self.unimodal_optimization_results = unimodal_optimization_results self.tasks = tasks self.k = k self.extract_k_best_modalities_per_task() + self.operator_registry = Registry() self.optimization_results = MultimodalResults( modalities, tasks, debug, self.k_best_modalities @@ -26,7 +32,7 @@ def __init__(self, modalities, unimodal_optimizer, tasks, k=2, debug=True): def optimize(self): for task in self.tasks: - self.optimize_intramodal_representations(task) + # self.optimize_intramodal_representations(task) self.optimize_intermodal_representations(task) def optimize_intramodal_representations(self, task): @@ -35,7 +41,7 @@ def optimize_intramodal_representations(self, task): modality.modality_id ] applied_representations = self.extract_representations( - representations, modality + representations, modality, task.model.name ) for i in range(1, len(applied_representations)): @@ -73,33 +79,76 @@ def optimize_intramodal_representations(self, task): ) def optimize_intermodal_representations(self, task): - pass + modality_combos = [] + n = len(self.k_best_cache[task.model.name]) + + # Generate combinations in depth-first order + def generate_extensions(current_combo, remaining_indices): + # Add current combination if it has at least 2 elements + if len(current_combo) >= 2: + combo_tuple = tuple(i for i in current_combo) + modality_combos.append(combo_tuple) + # Generate all possible extensions + for i in remaining_indices: + new_combo = current_combo + [i] + new_remaining = [j for j in remaining_indices if j > i] + generate_extensions(new_combo, new_remaining) + + # Start with each possible first element + for start_idx in range(n): + remaining = list(range(start_idx + 1, n)) + generate_extensions([start_idx], remaining) - def extract_representations(self, representations, modality): + print(modality_combos) + + def _evaluate_inter_modal(self, task, modality_combo): + fused_representation = None + for modality_id in modality_combo: + fused_representation = self.evaluate(task, modality_id, None, None, None) + + def extract_representations(self, representations, modality, task_name): applied_representations = [] for i in range(0, len(representations)): - applied_representation = modality - for j, rep in enumerate(representations[i].representations): - representation, is_context = ( - self.operator_registry.get_representation_by_name( - rep, modality.modality_type - ) + cache_key = ( + tuple(representations[i].representations), + representations[i].task_time, + representations[i].representation_time, + ) + if ( + cache_key + in self.unimodal_optimization_results.cache[modality.modality_id][ + task_name + ] + ): + applied_representations.append( + self.unimodal_optimization_results.cache[modality.modality_id][ + task_name + ][cache_key] ) - if representation is None: - if rep == AggregatedRepresentation.__name__: - representation = AggregatedRepresentation(Aggregation()) - else: - representation = representation() - representation.set_parameters(representations[i].params[j]) - if is_context: - applied_representation = applied_representation.context( - representation - ) - else: - applied_representation = ( - applied_representation.apply_representation(representation) + else: + applied_representation = modality + for j, rep in enumerate(representations[i].representations): + representation, is_context = ( + self.operator_registry.get_representation_by_name( + rep, modality.modality_type + ) ) - applied_representations.append(applied_representation) + if representation is None: + if rep == AggregatedRepresentation.__name__: + representation = AggregatedRepresentation(Aggregation()) + else: + representation = representation() + representation.set_parameters(representations[i].params[j]) + if is_context: + applied_representation = applied_representation.context( + representation + ) + else: + applied_representation = ( + applied_representation.apply_representation(representation) + ) + self.k_best_cache[task_name].append(applied_representation) + applied_representations.append(applied_representation) return applied_representations def evaluate(self, task, modality, representations, fusion, modality_ids): @@ -127,13 +176,32 @@ def add_to_cache(self, result_idx, combined_modality): def extract_k_best_modalities_per_task(self): self.k_best_modalities = {} + self.k_best_cache = {} for task in self.tasks: self.k_best_modalities[task.model.name] = {} + self.k_best_cache[task.model.name] = [] for modality in self.modalities: - self.k_best_modalities[task.model.name][modality.modality_id] = ( - self.unimodal_optimizer.get_k_best_results(modality, self.k, task) + k_best_results, cached_data = ( + self.unimodal_optimization_results.get_k_best_results( + modality, self.k, task + ) ) + self.k_best_modalities[task.model.name][ + modality.modality_id + ] = k_best_results + self.k_best_cache[task.model.name].extend(cached_data) + + def create_modality_index(self, task): + counter = 0 + k_best_idx = [] + for modality_id, values in self.k_best_modalities[task].items(): + for _ in values: + k_best_idx.append((modality_id, counter)) + counter += 1 + + return k_best_idx + class MultimodalResults: def __init__(self, modalities, tasks, debug, k_best_modalities): diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 21dce69840c..b6f75162c00 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -21,9 +21,12 @@ import pickle import time from concurrent.futures import ProcessPoolExecutor, as_completed -from dataclasses import dataclass +from dataclasses import dataclass, field, asdict import multiprocessing as mp +from typing import Union + +import numpy as np from systemds.scuro.representations.window_aggregation import WindowAggregation from build.lib.systemds.scuro.representations.aggregated_representation import ( @@ -60,28 +63,13 @@ def store_results(self, file_name=None): with open(file_name, "wb") as f: pickle.dump(self.operator_performance, f) - def get_k_best_results(self, modality, k, task): - """ - Get the k best results for the given modality - :param modality: modality to get the best results for - :param k: number of best results - """ - - results = sorted( - self.operator_performance.results[modality.modality_id][task.model.name], - key=lambda x: x.val_score, - reverse=True, - )[:k] - - return results - def optimize_parallel(self, n_workers=None): if n_workers is None: n_workers = min(len(self.modalities), mp.cpu_count()) with ProcessPoolExecutor(max_workers=n_workers) as executor: future_to_modality = { - executor.submit(self._process_modality, modality): modality + executor.submit(self._process_modality, modality, True): modality for modality in self.modalities } @@ -95,13 +83,17 @@ def optimize_parallel(self, n_workers=None): def optimize(self): for modality in self.modalities: - local_result = self._process_modality(modality) - self._merge_results(local_result) + local_result = self._process_modality(modality, False) + # self._merge_results(local_result) + + def _process_modality(self, modality, parallel): + if parallel: + local_results = UnimodalResults( + modalities=[modality], tasks=self.tasks, debug=False + ) + else: + local_results = self.operator_performance - def _process_modality(self, modality): - local_results = UnimodalResults( - modalities=[modality], tasks=self.tasks, debug=False - ) context_operators = self.operator_registry.get_context_operators() for context_operator in context_operators: @@ -148,6 +140,11 @@ def _merge_results(self, local_results): local_results.results[modality_id][task_name] ) + for modality in self.modalities: + for task_name in local_results.cache[modality]: + for key, value in local_results.cache[modality][task_name].items(): + self.operator_performance.cache[modality][task_name][key] = value + def _evaluate_local(self, modality, representations, local_results): if self._tasks_require_same_dims: if self.expected_dimensions == 1 and get_shape(modality.metadata) > 1: @@ -165,9 +162,8 @@ def _evaluate_local(self, modality, representations, local_results): local_results.add_result( scores, reps, - modality.modality_id, + modality, task.model.name, - modality.transform_time, end - start, ) else: @@ -179,9 +175,8 @@ def _evaluate_local(self, modality, representations, local_results): local_results.add_result( scores, representations, - modality.modality_id, + modality, task.model.name, - modality.transform_time, end - start, ) else: @@ -200,9 +195,8 @@ def _evaluate_local(self, modality, representations, local_results): local_results.add_result( scores, reps, - modality.modality_id, + modality, task.model.name, - modality.transform_time, end - start, ) else: @@ -213,9 +207,8 @@ def _evaluate_local(self, modality, representations, local_results): local_results.add_result( scores, representations, - modality.modality_id, + modality, task.model.name, - modality.transform_time, end - start, ) @@ -226,15 +219,16 @@ def __init__(self, modalities, tasks, debug=False): self.task_names = [task.model.name for task in tasks] self.results = {} self.debug = debug + self.cache = {} for modality in self.modality_ids: self.results[modality] = {} + self.cache[modality] = {} for task_name in self.task_names: + self.cache[modality][task_name] = {} self.results[modality][task_name] = [] - def add_result( - self, scores, representations, modality_id, task_name, rep_time, task_time - ): + def add_result(self, scores, representations, modality, task_name, task_time): parameters = [] representation_names = [] @@ -260,13 +254,16 @@ def add_result( params=parameters, train_score=scores[0], val_score=scores[1], - representation_time=rep_time, + representation_time=modality.transform_time, task_time=task_time, ) - self.results[modality_id][task_name].append(entry) + self.results[modality.modality_id][task_name].append(entry) + self.cache[modality.modality_id][task_name][ + (tuple(representation_names), scores[1], modality.transform_time) + ] = modality if self.debug: - print(f"{modality_id}_{task_name}: {entry}") + print(f"{modality.modality_id}_{task_name}: {entry}") def print_results(self): for modality in self.modality_ids: @@ -274,8 +271,30 @@ def print_results(self): for entry in self.results[modality][task_name]: print(f"{modality}_{task_name}: {entry}") + def get_k_best_results(self, modality, k, task): + """ + Get the k best results for the given modality + :param modality: modality to get the best results for + :param k: number of best results + """ + items = self.results[modality.modality_id][task.model.name] + sorted_indices = sorted( + range(len(items)), key=lambda x: items[x].val_score, reverse=True + )[:k] + + results = sorted( + self.results[modality.modality_id][task.model.name], + key=lambda x: x.val_score, + reverse=True, + )[:k] + + items = list(self.cache[modality.modality_id][task.model.name].items()) + reordered_cache = [items[i][1] for i in sorted_indices] + + return results, list(reordered_cache) + -@dataclass +@dataclass(frozen=True) class ResultEntry: val_score: float representations: list diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py index e4240690f18..1af88282f76 100644 --- a/src/main/python/systemds/scuro/modality/modality.py +++ b/src/main/python/systemds/scuro/modality/modality.py @@ -93,34 +93,38 @@ def update_metadata(self): updated_md = self.modality_type.update_metadata(md_v, self.data[i]) self.metadata[md_k] = updated_md - def flatten(self, padding=True): + def flatten(self, padding=False): """ Flattens modality data by row-wise concatenation Prerequisite for some ML-models """ max_len = 0 + data = [] for num_instance, instance in enumerate(self.data): if type(instance) is np.ndarray: - self.data[num_instance] = instance.flatten() + d = instance.flatten() + max_len = max(max_len, len(d)) + data.append(d) elif isinstance(instance, List): - self.data[num_instance] = np.array( + d = np.array( [item for sublist in instance for item in sublist] ).flatten() - max_len = max(max_len, len(self.data[num_instance])) + max_len = max(max_len, len(d)) + data.append(d) if padding: - for i, instance in enumerate(self.data): + for i, instance in enumerate(data): if isinstance(instance, np.ndarray): if len(instance) < max_len: padded_data = np.zeros(max_len, dtype=instance.dtype) padded_data[: len(instance)] = instance - self.data[i] = padded_data + data[i] = padded_data else: padded_data = [] for entry in instance: padded_data.append(utils.pad_sequences(entry, max_len)) - self.data[i] = padded_data - self.data = np.array(self.data) + data[i] = padded_data + self.data = np.array(data) return self def pad(self, value=0): diff --git a/src/main/python/systemds/scuro/representations/word2vec.py b/src/main/python/systemds/scuro/representations/word2vec.py index aa28499e636..8543379bc14 100644 --- a/src/main/python/systemds/scuro/representations/word2vec.py +++ b/src/main/python/systemds/scuro/representations/word2vec.py @@ -65,9 +65,7 @@ def transform(self, modality): embeddings = [] for sentences in modality.data: tokens = list(tokenize(sentences.lower())) - embeddings.append( - np.array(get_embedding(tokens, model)).reshape(1, -1).astype(np.float32) - ) + embeddings.append(np.array(get_embedding(tokens, model)).astype(np.float32)) if self.output_file is not None: save_embeddings(np.array(embeddings), self.output_file) From 74b304e58f696bf8aa3463c1e3b48c9ce6b4c02c Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Fri, 15 Aug 2025 10:57:44 +0200 Subject: [PATCH 14/19] add first verion of multimodal optimizer --- .../scuro/drsearch/multimodal_optimizer.py | 148 ++++++++++++------ .../scuro/drsearch/unimodal_optimizer.py | 4 +- .../systemds/scuro/modality/modality.py | 13 +- .../systemds/scuro/modality/transformed.py | 18 ++- 4 files changed, 130 insertions(+), 53 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index ff5c431aa36..7502cf07a72 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -32,7 +32,6 @@ def __init__( def optimize(self): for task in self.tasks: - # self.optimize_intramodal_representations(task) self.optimize_intermodal_representations(task) def optimize_intramodal_representations(self, task): @@ -82,29 +81,92 @@ def optimize_intermodal_representations(self, task): modality_combos = [] n = len(self.k_best_cache[task.model.name]) - # Generate combinations in depth-first order def generate_extensions(current_combo, remaining_indices): # Add current combination if it has at least 2 elements if len(current_combo) >= 2: combo_tuple = tuple(i for i in current_combo) modality_combos.append(combo_tuple) - # Generate all possible extensions + for i in remaining_indices: new_combo = current_combo + [i] new_remaining = [j for j in remaining_indices if j > i] generate_extensions(new_combo, new_remaining) - # Start with each possible first element for start_idx in range(n): remaining = list(range(start_idx + 1, n)) generate_extensions([start_idx], remaining) + fusion_methods = self.operator_registry.get_fusion_operators() + fused_representations = [] + reuse_fused_representations = False + for i, modality_combo in enumerate(modality_combos): + if i != 0: + reuse_fused_representations = self.is_prefix_match( + modality_combos[i], modality_combo + ) + if self.debug: + print( + f"New modality combo: {modality_combo} - Reuse: {reuse_fused_representations} - # fused reps: {len(fused_representations)}" + ) + if reuse_fused_representations: + mods = [ + self.k_best_cache[task.model.name][mod_idx] + for mod_idx in modality_combo[len(modality_combos[i - 1]) :] + ] + all_mods = [ + self.k_best_cache[task.model.name][mod_idx] + for mod_idx in modality_combo + ] + temp_fused_reps = [] + for j, fusion_method in enumerate(fusion_methods): + # Evaluate all mods + fused_rep = all_mods[0].combine(all_mods[1:], fusion_method()) + temp_fused_reps.append(fused_rep) + self.evaluate( + task, + fused_rep, + [ + self.k_best_modalities[task.model.name][k].representations + for k in modality_combo + ], + fusion_method, + modality_combo, + ) + if reuse_fused_representations: + for fused_representation in fused_representations: + fused_rep = fused_representation.combine(mods, fusion_method()) + temp_fused_reps.append(fused_rep) + self.evaluate( + task, + fused_rep, + [ + self.k_best_modalities[task.model.name][ + k + ].representations + for k in modality_combo + ], + fusion_method, + modality_combo, + ) + fused_representations = temp_fused_reps + reuse_fused_representations = False + + def is_prefix_match(self, seq1, seq2): + """ + Check if seq1 is a prefix of seq2. - print(modality_combos) + Args: + seq1: First sequence (list) + seq2: Second sequence (list) - def _evaluate_inter_modal(self, task, modality_combo): - fused_representation = None - for modality_id in modality_combo: - fused_representation = self.evaluate(task, modality_id, None, None, None) + Returns: + Boolean indicating whether seq1 is a prefix of seq2 + """ + # seq1 can only be a prefix if it's not longer than seq2 + if len(seq1) > len(seq2): + return False + + # Check if seq1 matches the beginning of seq2 + return seq2[: len(seq1)] == seq1 def extract_representations(self, representations, modality, task_name): applied_representations = [] @@ -151,10 +213,9 @@ def extract_representations(self, representations, modality, task_name): applied_representations.append(applied_representation) return applied_representations - def evaluate(self, task, modality, representations, fusion, modality_ids): + def evaluate(self, task, modality, representations, fusion, modality_combo): if task.expected_dim == 1 and get_shape(modality.metadata) > 1: for aggregation in Aggregation().get_aggregation_functions(): - # padding should not be necessary here agg_operator = AggregatedRepresentation(Aggregation(aggregation, False)) agg_modality = agg_operator.transform(modality) @@ -163,12 +224,20 @@ def evaluate(self, task, modality, representations, fusion, modality_ids): reps.append(agg_operator) self.optimization_results.add_result( - scores, reps, [fusion], modality_ids, task.model.name + scores, + reps, + modality.transformation, + modality_combo, + task.model.name, ) else: scores = task.run(modality.data) self.optimization_results.add_result( - scores, representations, [fusion], modality_ids, task.model.name + scores, + representations, + modality.transformation, + modality_combo, + task.model.name, ) def add_to_cache(self, result_idx, combined_modality): @@ -178,7 +247,7 @@ def extract_k_best_modalities_per_task(self): self.k_best_modalities = {} self.k_best_cache = {} for task in self.tasks: - self.k_best_modalities[task.model.name] = {} + self.k_best_modalities[task.model.name] = [] self.k_best_cache[task.model.name] = [] for modality in self.modalities: k_best_results, cached_data = ( @@ -187,21 +256,9 @@ def extract_k_best_modalities_per_task(self): ) ) - self.k_best_modalities[task.model.name][ - modality.modality_id - ] = k_best_results + self.k_best_modalities[task.model.name].extend(k_best_results) self.k_best_cache[task.model.name].extend(cached_data) - def create_modality_index(self, task): - counter = 0 - k_best_idx = [] - for modality_id, values in self.k_best_modalities[task].items(): - for _ in values: - k_best_idx.append((modality_id, counter)) - counter += 1 - - return k_best_idx - class MultimodalResults: def __init__(self, modalities, tasks, debug, k_best_modalities): @@ -215,35 +272,36 @@ def __init__(self, modalities, tasks, debug, k_best_modalities): self.results[task.model.name] = {} def add_result( - self, scores, best_representation_idx, fusion_methods, modality_ids, task_name + self, scores, best_representation_idx, fusion_methods, modality_combo, task_name ): entry = MultimodalResultEntry( representations=best_representation_idx, train_score=scores[0], val_score=scores[1], - fusion_methods=[fusion_method.__name__ for fusion_method in fusion_methods], - modality_ids=modality_ids, + fusion_methods=[ + fusion_method.__class__.__name__ for fusion_method in fusion_methods + ], + modality_combo=modality_combo, task=task_name, ) - modality_id_strings = "_".join(list(map(str, modality_ids))) + modality_id_strings = "_".join(list(map(str, modality_combo))) if not modality_id_strings in self.results[task_name]: self.results[task_name][modality_id_strings] = [] self.results[task_name][modality_id_strings].append(entry) + if self.debug: + print(f"{modality_id_strings}_{task_name}: {entry}") + def print_results(self): for task_name in self.task_names: for modality in self.results[task_name].keys(): for entry in self.results[task_name][modality]: reps = [] - for i, mod_idx in enumerate(entry.modality_ids): - reps.append( - self.k_best_modalities[task_name][mod_idx][ - entry.representations[i] - ] - ) + for i, mod_idx in enumerate(entry.modality_combo): + reps.append(self.k_best_modalities[task_name][mod_idx]) print( f"{modality}_{task_name}: " @@ -251,22 +309,18 @@ def print_results(self): ) for i, rep in enumerate(reps): print( - f" Representation: {entry.modality_ids[i]} - {rep.representations}" + f" Representation: {entry.modality_combo[i]} - {rep.representations}" ) - if i < len(reps) - 1: - print(f" Fusion: {entry.fusion_methods[i]} ") + # if i < len(reps) - 1: + print(f" Fusion: {entry.fusion_methods[0]} ") def store_results(self, file_name=None): for task_name in self.task_names: for modality in self.results[task_name].keys(): for entry in self.results[task_name][modality]: reps = [] - for i, mod_idx in enumerate(entry.modality_ids): - reps.append( - self.k_best_modalities[task_name][mod_idx][ - entry.representations[i] - ] - ) + for i, mod_idx in enumerate(entry.modality_combo): + reps.append(self.k_best_modalities[task_name][mod_idx]) entry.representations = reps import pickle @@ -284,7 +338,7 @@ def store_results(self, file_name=None): @dataclasses.dataclass class MultimodalResultEntry: val_score: float - modality_ids: list + modality_combo: list representations: list fusion_methods: list train_score: float diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index b6f75162c00..1e114bb34ee 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -61,7 +61,7 @@ def store_results(self, file_name=None): file_name = "unimodal_optimizer" + timestr + ".pkl" with open(file_name, "wb") as f: - pickle.dump(self.operator_performance, f) + pickle.dump(self.operator_performance.results, f) def optimize_parallel(self, n_workers=None): if n_workers is None: @@ -167,7 +167,7 @@ def _evaluate_local(self, modality, representations, local_results): end - start, ) else: - # modality.pad() + modality.pad() for task in self.tasks: start = time.time() scores = task.run(modality.data) diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py index 1af88282f76..94e745b2cc1 100644 --- a/src/main/python/systemds/scuro/modality/modality.py +++ b/src/main/python/systemds/scuro/modality/modality.py @@ -22,6 +22,7 @@ from typing import List import numpy as np +from numpy.f2py.auxfuncs import throw_error from systemds.scuro.modality.type import ModalityType from systemds.scuro.representations import utils @@ -127,17 +128,23 @@ def flatten(self, padding=False): self.data = np.array(data) return self - def pad(self, value=0): + def pad(self, value=0, max_len=None): try: - result = np.array(self.data) + if max_len is None: + result = np.array(self.data) + else: + raise "Needs padding to max_len" except: - maxlen = max([len(seq) for seq in self.data]) + maxlen = ( + max([len(seq) for seq in self.data]) if max_len is None else max_len + ) result = np.full((len(self.data), maxlen), value, dtype=self.data_type) for i, seq in enumerate(self.data): data = seq[:maxlen] result[i, : len(data)] = data + # TODO: add padding to metadata as attention_masks self.data = result diff --git a/src/main/python/systemds/scuro/modality/transformed.py b/src/main/python/systemds/scuro/modality/transformed.py index 63ca10251c4..6523e9502fc 100644 --- a/src/main/python/systemds/scuro/modality/transformed.py +++ b/src/main/python/systemds/scuro/modality/transformed.py @@ -27,6 +27,7 @@ from systemds.scuro.modality.modality import Modality from systemds.scuro.representations.window_aggregation import WindowAggregation import time +import copy class TransformedModality(Modality): @@ -44,7 +45,22 @@ def __init__(self, modality, transformation, new_modality_type=None): super().__init__( new_modality_type, modality.modality_id, metadata, modality.data_type ) - self.transformation = transformation + self.transformation = None + self.add_transformation(transformation, modality) + + def add_transformation(self, transformation, modality): + if ( + transformation.__class__.__bases__[0].__name__ == "Fusion" + and modality.transformation[0].__class__.__bases__[0].__name__ != "Fusion" + ): + self.transformation = [] + else: + self.transformation = ( + [] + if type(modality).__name__ != "TransformedModality" + else copy.deepcopy(modality.transformation) + ) + self.transformation.append(transformation) def copy_from_instance(self): return type(self)(self, self.transformation) From 3a8f78f055373726d3e532e22a2771c6bceeda8c Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Fri, 15 Aug 2025 14:36:46 +0200 Subject: [PATCH 15/19] add caching --- .../scuro/drsearch/multimodal_optimizer.py | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index 7502cf07a72..0a4dd0c3489 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -23,6 +23,7 @@ def __init__( self.tasks = tasks self.k = k self.extract_k_best_modalities_per_task() + self.debug = debug self.operator_registry = Registry() self.optimization_results = MultimodalResults( @@ -80,6 +81,7 @@ def optimize_intramodal_representations(self, task): def optimize_intermodal_representations(self, task): modality_combos = [] n = len(self.k_best_cache[task.model.name]) + reuse_cache = {} def generate_extensions(current_combo, remaining_indices): # Add current combination if it has at least 2 elements @@ -99,19 +101,33 @@ def generate_extensions(current_combo, remaining_indices): fused_representations = [] reuse_fused_representations = False for i, modality_combo in enumerate(modality_combos): + # clear reuse cache + if i != 0: reuse_fused_representations = self.is_prefix_match( - modality_combos[i], modality_combo - ) - if self.debug: - print( - f"New modality combo: {modality_combo} - Reuse: {reuse_fused_representations} - # fused reps: {len(fused_representations)}" + modality_combos[i-1], modality_combo ) if reuse_fused_representations: mods = [ self.k_best_cache[task.model.name][mod_idx] for mod_idx in modality_combo[len(modality_combos[i - 1]) :] ] + fused_representations = reuse_cache[modality_combos[i - 1]] + else: + prefix_idx = self.compute_equal_prefix_index(modality_combos[i-1], modality_combo) + if prefix_idx > 1: + fused_representations = reuse_cache[modality_combos[i - 1][:prefix_idx]] + reuse_fused_representations = True + mods = [ + self.k_best_cache[task.model.name][mod_idx] + for mod_idx in modality_combo[prefix_idx:] + ] + if self.debug: + print( + f"New modality combo: {modality_combo} - Reuse: {reuse_fused_representations} - # fused reps: {len(fused_representations)}" + ) + + all_mods = [ self.k_best_cache[task.model.name][mod_idx] for mod_idx in modality_combo @@ -147,7 +163,9 @@ def generate_extensions(current_combo, remaining_indices): fusion_method, modality_combo, ) - fused_representations = temp_fused_reps + + if len(modality_combo) < len(self.k_best_cache[task.model.name]) and i +1 < len(modality_combos) and self.is_prefix_match(modality_combos[i], modality_combos[i+1]): + reuse_cache[modality_combo] = temp_fused_reps reuse_fused_representations = False def is_prefix_match(self, seq1, seq2): @@ -162,11 +180,21 @@ def is_prefix_match(self, seq1, seq2): Boolean indicating whether seq1 is a prefix of seq2 """ # seq1 can only be a prefix if it's not longer than seq2 + if len(seq1) > len(seq2): return False # Check if seq1 matches the beginning of seq2 return seq2[: len(seq1)] == seq1 + + + def compute_equal_prefix_index(self, seq1, seq2): + max_len = min(len(seq1), len(seq2)) + i = 0 + while i < max_len and seq1[i] == seq2[i]: + i += 1 + + return i def extract_representations(self, representations, modality, task_name): applied_representations = [] From ccfb8c250ba89afab2eb7e509598b7c08b6061c4 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Mon, 18 Aug 2025 12:47:42 +0200 Subject: [PATCH 16/19] add multimodal fusion optimizer --- src/main/python/systemds/scuro/__init__.py | 4 + .../systemds/scuro/dataloader/audio_loader.py | 24 +++--- .../systemds/scuro/dataloader/video_loader.py | 22 ++++-- .../scuro/drsearch/multimodal_optimizer.py | 53 +++++++------ .../scuro/drsearch/unimodal_optimizer.py | 2 +- .../scuro/representations/aggregate.py | 2 +- .../systemds/scuro/representations/average.py | 19 ++--- .../systemds/scuro/representations/bert.py | 3 +- .../systemds/scuro/representations/bow.py | 3 +- .../scuro/representations/concatenation.py | 22 +----- .../systemds/scuro/representations/fusion.py | 17 ++++ .../systemds/scuro/representations/glove.py | 1 + .../scuro/representations/hadamard.py | 3 +- .../systemds/scuro/representations/lstm.py | 2 +- .../systemds/scuro/representations/max.py | 4 +- .../scuro/representations/spectrogram.py | 2 +- .../systemds/scuro/representations/sum.py | 11 +-- .../systemds/scuro/representations/tfidf.py | 1 + .../scuro/representations/word2vec.py | 3 +- src/main/python/tests/scuro/data_generator.py | 12 +++ src/main/python/tests/scuro/test_dr_search.py | 4 +- .../python/tests/scuro/test_fusion_orders.py | 2 +- .../scuro/test_fusion_representations.py | 78 +++++++++++++++++++ .../tests/scuro/test_multimodal_fusion.py | 68 +++++++++------- .../tests/scuro/test_unimodal_optimizer.py | 13 ++-- 25 files changed, 248 insertions(+), 127 deletions(-) create mode 100644 src/main/python/tests/scuro/test_fusion_representations.py diff --git a/src/main/python/systemds/scuro/__init__.py b/src/main/python/systemds/scuro/__init__.py index 1c3cfe92231..ae9aed44c0a 100644 --- a/src/main/python/systemds/scuro/__init__.py +++ b/src/main/python/systemds/scuro/__init__.py @@ -73,6 +73,8 @@ from systemds.scuro.drsearch.unimodal_representation_optimizer import ( UnimodalRepresentationOptimizer, ) +from systemds.scuro.drsearch.multimodal_optimizer import MultimodalOptimizer +from systemds.scuro.drsearch.unimodal_optimizer import UnimodalOptimizer __all__ = [ @@ -127,4 +129,6 @@ "OptimizationData", "RepresentationCache", "UnimodalRepresentationOptimizer", + "UnimodalOptimizer", + "MultimodalOptimizer", ] diff --git a/src/main/python/systemds/scuro/dataloader/audio_loader.py b/src/main/python/systemds/scuro/dataloader/audio_loader.py index a1dad304e53..1197617673f 100644 --- a/src/main/python/systemds/scuro/dataloader/audio_loader.py +++ b/src/main/python/systemds/scuro/dataloader/audio_loader.py @@ -45,18 +45,18 @@ def __init__( def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): self.file_sanity_check(file) - # if not self.load_data_from_file: - # import numpy as np - # - # self.metadata[file] = self.modality_type.create_audio_metadata( - # 1000, np.array([0]) - # ) - # else: - audio, sr = librosa.load(file, dtype=self._data_type) + if not self.load_data_from_file: + import numpy as np - if self.normalize: - audio = librosa.util.normalize(audio) + self.metadata[file] = self.modality_type.create_audio_metadata( + 1000, np.array([0]) + ) + else: + audio, sr = librosa.load(file, dtype=self._data_type) - self.metadata[file] = self.modality_type.create_audio_metadata(sr, audio) + if self.normalize: + audio = librosa.util.normalize(audio) - self.data.append(audio) + self.metadata[file] = self.modality_type.create_audio_metadata(sr, audio) + + self.data.append(audio) diff --git a/src/main/python/systemds/scuro/dataloader/video_loader.py b/src/main/python/systemds/scuro/dataloader/video_loader.py index 96ea5f11f69..0e77d5dc57b 100644 --- a/src/main/python/systemds/scuro/dataloader/video_loader.py +++ b/src/main/python/systemds/scuro/dataloader/video_loader.py @@ -35,11 +35,13 @@ def __init__( data_type: Union[np.dtype, str] = np.float16, chunk_size: Optional[int] = None, load=True, + fps=None, ): super().__init__( source_path, indices, data_type, chunk_size, ModalityType.VIDEO ) self.load_data_from_file = load + self.fps = fps def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): self.file_sanity_check(file) @@ -53,25 +55,33 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): if not cap.isOpened(): raise f"Could not read video at path: {file}" - fps = cap.get(cv2.CAP_PROP_FPS) + orig_fps = cap.get(cv2.CAP_PROP_FPS) + frame_interval = 1 + if self.fps is not None and self.fps < orig_fps: + frame_interval = int(round(orig_fps / self.fps)) + else: + self.fps = orig_fps + length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) num_channels = 3 self.metadata[file] = self.modality_type.create_video_metadata( - fps, length, width, height, num_channels + self.fps, length, width, height, num_channels ) frames = [] + idx = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - frame = frame.astype(self._data_type) / 255.0 - - frames.append(frame) + if idx % frame_interval == 0: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frame = frame.astype(self._data_type) / 255.0 + frames.append(frame) + idx += 1 self.data.append(np.stack(frames)) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index 0a4dd0c3489..fdda2b6cd20 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -102,10 +102,12 @@ def generate_extensions(current_combo, remaining_indices): reuse_fused_representations = False for i, modality_combo in enumerate(modality_combos): # clear reuse cache - + if i % 5 == 0: + reuse_cache = self.prune_cache(modality_combos[i:], reuse_cache) + if i != 0: reuse_fused_representations = self.is_prefix_match( - modality_combos[i-1], modality_combo + modality_combos[i - 1], modality_combo ) if reuse_fused_representations: mods = [ @@ -114,9 +116,13 @@ def generate_extensions(current_combo, remaining_indices): ] fused_representations = reuse_cache[modality_combos[i - 1]] else: - prefix_idx = self.compute_equal_prefix_index(modality_combos[i-1], modality_combo) + prefix_idx = self.compute_equal_prefix_index( + modality_combos[i - 1], modality_combo + ) if prefix_idx > 1: - fused_representations = reuse_cache[modality_combos[i - 1][:prefix_idx]] + fused_representations = reuse_cache[ + modality_combos[i - 1][:prefix_idx] + ] reuse_fused_representations = True mods = [ self.k_best_cache[task.model.name][mod_idx] @@ -126,8 +132,7 @@ def generate_extensions(current_combo, remaining_indices): print( f"New modality combo: {modality_combo} - Reuse: {reuse_fused_representations} - # fused reps: {len(fused_representations)}" ) - - + all_mods = [ self.k_best_cache[task.model.name][mod_idx] for mod_idx in modality_combo @@ -163,37 +168,37 @@ def generate_extensions(current_combo, remaining_indices): fusion_method, modality_combo, ) - - if len(modality_combo) < len(self.k_best_cache[task.model.name]) and i +1 < len(modality_combos) and self.is_prefix_match(modality_combos[i], modality_combos[i+1]): + + if ( + len(modality_combo) < len(self.k_best_cache[task.model.name]) + and i + 1 < len(modality_combos) + and self.is_prefix_match(modality_combos[i], modality_combos[i + 1]) + ): reuse_cache[modality_combo] = temp_fused_reps reuse_fused_representations = False + def prune_cache(self, sequences, cache): + seqs_as_tuples = [tuple(seq) for seq in sequences] + + def still_used(key): + return any(self.is_prefix_match(key, seq) for seq in seqs_as_tuples) + + cache = {key: value for key, value in cache.items() if still_used(key)} + return cache + def is_prefix_match(self, seq1, seq2): - """ - Check if seq1 is a prefix of seq2. - - Args: - seq1: First sequence (list) - seq2: Second sequence (list) - - Returns: - Boolean indicating whether seq1 is a prefix of seq2 - """ - # seq1 can only be a prefix if it's not longer than seq2 - if len(seq1) > len(seq2): return False # Check if seq1 matches the beginning of seq2 return seq2[: len(seq1)] == seq1 - - + def compute_equal_prefix_index(self, seq1, seq2): max_len = min(len(seq1), len(seq2)) i = 0 while i < max_len and seq1[i] == seq2[i]: i += 1 - + return i def extract_representations(self, representations, modality, task_name): @@ -339,7 +344,7 @@ def print_results(self): print( f" Representation: {entry.modality_combo[i]} - {rep.representations}" ) - # if i < len(reps) - 1: + print(f" Fusion: {entry.fusion_methods[0]} ") def store_results(self, file_name=None): diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py index 1e114bb34ee..030f04aa431 100644 --- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py @@ -29,7 +29,7 @@ import numpy as np from systemds.scuro.representations.window_aggregation import WindowAggregation -from build.lib.systemds.scuro.representations.aggregated_representation import ( +from systemds.scuro.representations.aggregated_representation import ( AggregatedRepresentation, ) from systemds.scuro import ModalityType, Aggregation diff --git a/src/main/python/systemds/scuro/representations/aggregate.py b/src/main/python/systemds/scuro/representations/aggregate.py index 1e73c81696d..2c046dc4016 100644 --- a/src/main/python/systemds/scuro/representations/aggregate.py +++ b/src/main/python/systemds/scuro/representations/aggregate.py @@ -92,7 +92,7 @@ def execute(self, modality): padded_data.append(utils.pad_sequences(entry, max_len)) data[i] = padded_data - return data + return np.array(data) def transform(self, modality): return self.execute(modality) diff --git a/src/main/python/systemds/scuro/representations/average.py b/src/main/python/systemds/scuro/representations/average.py index 8a7e6b9ec8e..ac51f5d1e8d 100644 --- a/src/main/python/systemds/scuro/representations/average.py +++ b/src/main/python/systemds/scuro/representations/average.py @@ -18,7 +18,7 @@ # under the License. # # ------------------------------------------------------------- - +import copy from typing import List import numpy as np @@ -37,23 +37,14 @@ def __init__(self): Combines modalities using averaging """ super().__init__("Average") + self.needs_alignment = True self.associative = True self.commutative = True - def transform(self, modalities: List[Modality]): - for modality in modalities: - modality.flatten() - - max_emb_size = self.get_max_embedding_size(modalities) - - padded_modalities = [] - for modality in modalities: - d = pad_sequences(modality.data, maxlen=max_emb_size, dtype="float32") - padded_modalities.append(d) - - data = padded_modalities[0] + def execute(self, modalities: List[Modality]): + data = copy.deepcopy(modalities[0].data) for i in range(1, len(modalities)): - data += padded_modalities[i] + data += modalities[i].data data /= len(modalities) diff --git a/src/main/python/systemds/scuro/representations/bert.py b/src/main/python/systemds/scuro/representations/bert.py index e6611f0b7c9..3478b84e672 100644 --- a/src/main/python/systemds/scuro/representations/bert.py +++ b/src/main/python/systemds/scuro/representations/bert.py @@ -18,7 +18,7 @@ # under the License. # # ------------------------------------------------------------- - +import numpy as np from systemds.scuro.modality.transformed import TransformedModality from systemds.scuro.representations.unimodal import UnimodalRepresentation import torch @@ -56,6 +56,7 @@ def transform(self, modality): if self.output_file is not None: save_embeddings(embeddings, self.output_file) + transformed_modality.data_type = np.float32 transformed_modality.data = embeddings return transformed_modality diff --git a/src/main/python/systemds/scuro/representations/bow.py b/src/main/python/systemds/scuro/representations/bow.py index 6778811c49c..7cfddbb506f 100644 --- a/src/main/python/systemds/scuro/representations/bow.py +++ b/src/main/python/systemds/scuro/representations/bow.py @@ -18,7 +18,7 @@ # under the License. # # ------------------------------------------------------------- - +import numpy as np from sklearn.feature_extraction.text import CountVectorizer from systemds.scuro.modality.transformed import TransformedModality @@ -50,5 +50,6 @@ def transform(self, modality): if self.output_file is not None: save_embeddings(X, self.output_file) + transformed_modality.data_type = np.float32 transformed_modality.data = X return transformed_modality diff --git a/src/main/python/systemds/scuro/representations/concatenation.py b/src/main/python/systemds/scuro/representations/concatenation.py index c7ce33ab5c7..a4d4d53c43e 100644 --- a/src/main/python/systemds/scuro/representations/concatenation.py +++ b/src/main/python/systemds/scuro/representations/concatenation.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- from typing import List - +import copy import numpy as np from systemds.scuro.modality.modality import Modality @@ -33,14 +33,13 @@ @register_fusion_operator() class Concatenation(Fusion): - def __init__(self, padding=True): + def __init__(self): """ Combines modalities using concatenation """ super().__init__("Concatenation") - self.padding = padding - def transform(self, modalities: List[Modality]): + def execute(self, modalities: List[Modality]): if len(modalities) == 1: return np.array(modalities[0].data) @@ -53,19 +52,6 @@ def transform(self, modalities: List[Modality]): data = np.zeros((size, 0)) for modality in modalities: - if self.padding: - data = np.concatenate( - [ - data, - pad_sequences( - modality.data, - maxlen=max_emb_size, - dtype=modality.data.dtype, - ), - ], - axis=-1, - ) - else: - data = np.concatenate([data, modality.data], axis=-1) + data = np.concatenate([data, copy.deepcopy(modality.data)], axis=-1) return np.array(data) diff --git a/src/main/python/systemds/scuro/representations/fusion.py b/src/main/python/systemds/scuro/representations/fusion.py index cbbb5606e6d..4b746eee219 100644 --- a/src/main/python/systemds/scuro/representations/fusion.py +++ b/src/main/python/systemds/scuro/representations/fusion.py @@ -21,9 +21,11 @@ from typing import List import numpy as np +from systemds.scuro import AggregatedRepresentation, Aggregation from systemds.scuro.modality.modality import Modality from systemds.scuro.representations.representation import Representation +from systemds.scuro.utils.schema_helpers import get_shape class Fusion(Representation): @@ -44,6 +46,21 @@ def transform(self, modalities: List[Modality]): :param modalities: List of modalities used in the fusion :return: fused data """ + mods = [] + for modality in modalities: + agg_modality = None + if get_shape(modality.metadata) > 1: + agg_operator = AggregatedRepresentation(Aggregation()) + agg_modality = agg_operator.transform(modality) + mods.append(agg_modality if agg_modality else modality) + + if self.needs_alignment: + max_len = self.get_max_embedding_size(mods) + for modality in mods: + modality.pad(max_len=max_len) + return self.execute(mods) + + def execute(self, modalities: List[Modality]): raise f"Not implemented for Fusion: {self.name}" def get_max_embedding_size(self, modalities: List[Modality]): diff --git a/src/main/python/systemds/scuro/representations/glove.py b/src/main/python/systemds/scuro/representations/glove.py index d948567f3f5..9076efecfc9 100644 --- a/src/main/python/systemds/scuro/representations/glove.py +++ b/src/main/python/systemds/scuro/representations/glove.py @@ -67,5 +67,6 @@ def transform(self, modality): if self.output_file is not None: save_embeddings(np.array(embeddings), self.output_file) + transformed_modality.data_type = np.float32 transformed_modality.data = np.array(embeddings) return transformed_modality diff --git a/src/main/python/systemds/scuro/representations/hadamard.py b/src/main/python/systemds/scuro/representations/hadamard.py index 138003b8741..a777768ff6e 100644 --- a/src/main/python/systemds/scuro/representations/hadamard.py +++ b/src/main/python/systemds/scuro/representations/hadamard.py @@ -41,8 +41,7 @@ def __init__(self): self.commutative = True self.associative = True - def transform(self, modalities: List[Modality], train_indices=None): - # TODO: check for alignment in the metadata + def execute(self, modalities: List[Modality], train_indices=None): fused_data = np.prod([m.data for m in modalities], axis=0) return fused_data diff --git a/src/main/python/systemds/scuro/representations/lstm.py b/src/main/python/systemds/scuro/representations/lstm.py index cbab0f68978..0cfafddefa9 100644 --- a/src/main/python/systemds/scuro/representations/lstm.py +++ b/src/main/python/systemds/scuro/representations/lstm.py @@ -64,7 +64,7 @@ def transform(self, modalities: List[Modality]): result = np.zeros((size, 0)) for modality in modalities: - if modality.modality_type in self.unimodal_embeddings.keys(): + if modality.modality_type in list(self.unimodal_embeddings.keys()): out = self.unimodal_embeddings.get(modality.modality_type) else: out = self.run_lstm(modality.data) diff --git a/src/main/python/systemds/scuro/representations/max.py b/src/main/python/systemds/scuro/representations/max.py index 6ecf5fd52f3..39f5069c2b5 100644 --- a/src/main/python/systemds/scuro/representations/max.py +++ b/src/main/python/systemds/scuro/representations/max.py @@ -40,11 +40,9 @@ def __init__(self): self.associative = True self.commutative = True - def transform( + def execute( self, modalities: List[Modality], ): - # TODO: need to check if data is aligned - same number of dimension fused_data = np.maximum.reduce([m.data for m in modalities]) - return fused_data diff --git a/src/main/python/systemds/scuro/representations/spectrogram.py b/src/main/python/systemds/scuro/representations/spectrogram.py index 5fb1780536d..8daa9abb015 100644 --- a/src/main/python/systemds/scuro/representations/spectrogram.py +++ b/src/main/python/systemds/scuro/representations/spectrogram.py @@ -48,7 +48,7 @@ def transform(self, modality): ).astype(modality.data_type) S_dB = librosa.amplitude_to_db(np.abs(spectrogram)) - result.append(S_dB.T) + result.append(S_dB.T.reshape(-1)) transformed_modality.data = result return transformed_modality diff --git a/src/main/python/systemds/scuro/representations/sum.py b/src/main/python/systemds/scuro/representations/sum.py index 46d93f2eda0..5b3710b6e14 100644 --- a/src/main/python/systemds/scuro/representations/sum.py +++ b/src/main/python/systemds/scuro/representations/sum.py @@ -37,15 +37,12 @@ def __init__(self): Combines modalities using colum-wise sum """ super().__init__("Sum") + self.needs_alignment = True - def transform(self, modalities: List[Modality]): - max_emb_size = self.get_max_embedding_size(modalities) - - data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype="float32") + def execute(self, modalities: List[Modality]): + data = modalities[0].data for m in range(1, len(modalities)): - data += pad_sequences( - modalities[m].data, maxlen=max_emb_size, dtype="float32" - ) + data += modalities[m].data return data diff --git a/src/main/python/systemds/scuro/representations/tfidf.py b/src/main/python/systemds/scuro/representations/tfidf.py index c26039f46a9..3b8f069df83 100644 --- a/src/main/python/systemds/scuro/representations/tfidf.py +++ b/src/main/python/systemds/scuro/representations/tfidf.py @@ -47,5 +47,6 @@ def transform(self, modality): if self.output_file is not None: save_embeddings(X, self.output_file) + transformed_modality.data_type = np.float32 transformed_modality.data = X return transformed_modality diff --git a/src/main/python/systemds/scuro/representations/word2vec.py b/src/main/python/systemds/scuro/representations/word2vec.py index 8543379bc14..88d60ac828b 100644 --- a/src/main/python/systemds/scuro/representations/word2vec.py +++ b/src/main/python/systemds/scuro/representations/word2vec.py @@ -69,5 +69,6 @@ def transform(self, modality): if self.output_file is not None: save_embeddings(np.array(embeddings), self.output_file) - transformed_modality.data = embeddings + transformed_modality.data_type = np.float32 + transformed_modality.data = np.array(embeddings) return transformed_modality diff --git a/src/main/python/tests/scuro/data_generator.py b/src/main/python/tests/scuro/data_generator.py index e2dceec329d..e57716fa99d 100644 --- a/src/main/python/tests/scuro/data_generator.py +++ b/src/main/python/tests/scuro/data_generator.py @@ -193,6 +193,18 @@ def create_visual_modality( return (data, metadata) + def create_balanced_labels(self, num_instances, num_classes=2): + if num_instances % num_classes != 0: + raise ValueError("Size must be even to have equal numbers of classes.") + + class_size = int(num_instances / num_classes) + vector = np.array([0] * class_size) + for i in range(num_classes - 1): + vector = np.concatenate((vector, np.array([1] * class_size))) + + np.random.shuffle(vector) + return vector + def setup_data(modalities, num_instances, path): if os.path.isdir(path): diff --git a/src/main/python/tests/scuro/test_dr_search.py b/src/main/python/tests/scuro/test_dr_search.py index 50f57eebb20..3e0e702e6f3 100644 --- a/src/main/python/tests/scuro/test_dr_search.py +++ b/src/main/python/tests/scuro/test_dr_search.py @@ -94,7 +94,9 @@ def setUpClass(cls): cls.num_instances = 20 cls.data_generator = ModalityRandomDataGenerator() - cls.labels = np.random.choice([0, 1], size=cls.num_instances) + cls.labels = ModalityRandomDataGenerator().create_balanced_labels( + num_instances=cls.num_instances + ) # TODO: adapt the representation so they return non aggregated values. Apply windowing operation instead cls.video = cls.data_generator.create1DModality( diff --git a/src/main/python/tests/scuro/test_fusion_orders.py b/src/main/python/tests/scuro/test_fusion_orders.py index eb01d18ffe4..22d64bcc0bf 100644 --- a/src/main/python/tests/scuro/test_fusion_orders.py +++ b/src/main/python/tests/scuro/test_fusion_orders.py @@ -65,7 +65,7 @@ def test_fusion_order_concat(self): self.assertFalse(np.array_equal(r_1_r_2.data, r_2_r_1.data)) self.assertFalse(np.array_equal(r_1_r_2_r_3.data, r_2_r_1_r_3.data)) - self.assertFalse(np.array_equal(r_1_r_2_r_3.data, r1_r2_r3.data)) + self.assertFalse(np.array_equal(r_2_r_1.data, r1_r2_r3.data)) self.assertFalse(np.array_equal(r_1_r_2.data, r1_r2_r3.data)) def test_fusion_order_max(self): diff --git a/src/main/python/tests/scuro/test_fusion_representations.py b/src/main/python/tests/scuro/test_fusion_representations.py new file mode 100644 index 00000000000..6aaeb2a4fbe --- /dev/null +++ b/src/main/python/tests/scuro/test_fusion_representations.py @@ -0,0 +1,78 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +import unittest + +import numpy as np +from systemds.scuro.representations.window_aggregation import WindowAggregation +from systemds.scuro.representations.aggregate import Aggregation +from systemds.scuro.representations.wav2vec import Wav2Vec +from systemds.scuro.representations.tfidf import TfIdf +from systemds.scuro.representations.spectrogram import Spectrogram +from systemds.scuro.representations.bow import BoW +from systemds.scuro.representations.mel_spectrogram import MelSpectrogram +from systemds.scuro.representations.bert import Bert +from systemds.scuro.representations.mfcc import MFCC +from systemds.scuro.representations.multimodal_attention_fusion import ( + MultiModalAttentionFusion, + AttentionFusion, +) +from systemds.scuro.representations.resnet import ResNet +from systemds.scuro.modality.unimodal_modality import UnimodalModality +from systemds.scuro.modality.type import ModalityType +from tests.scuro.data_generator import ModalityRandomDataGenerator, TestDataLoader + + +class TestFusionOrders(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.num_instances = 40 + cls.indices = np.array(range(cls.num_instances)) + audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data( + cls.num_instances, 100 + ) + text_data, text_md = ModalityRandomDataGenerator().create_text_data( + cls.num_instances + ) + video_data, video_md = ModalityRandomDataGenerator().create_visual_modality( + cls.num_instances, 60 + ) + cls.audio = UnimodalModality( + TestDataLoader( + cls.indices, None, ModalityType.AUDIO, audio_data, np.float32, audio_md + ) + ) + cls.video = UnimodalModality( + TestDataLoader( + cls.indices, 10, ModalityType.VIDEO, video_data, np.float32, video_md + ) + ) + cls.text = UnimodalModality( + TestDataLoader( + cls.indices, None, ModalityType.TEXT, text_data, str, text_md + ) + ) + + def test_attention(self): + r_a = self.audio.apply_representation(MelSpectrogram()) + r_t = self.text.apply_representation(TfIdf()) + r_v = self.video.apply_representation(ResNet()) + + fused = AttentionFusion().transform([r_a, r_v, r_t]) diff --git a/src/main/python/tests/scuro/test_multimodal_fusion.py b/src/main/python/tests/scuro/test_multimodal_fusion.py index 77f03054eb5..ae3ddedffb1 100644 --- a/src/main/python/tests/scuro/test_multimodal_fusion.py +++ b/src/main/python/tests/scuro/test_multimodal_fusion.py @@ -22,12 +22,15 @@ import shutil import unittest +from multiprocessing import freeze_support import numpy as np from sklearn import svm from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split +from systemds.scuro.drsearch.multimodal_optimizer import MultimodalOptimizer +from systemds.scuro.drsearch.unimodal_optimizer import UnimodalOptimizer from systemds.scuro.representations.concatenation import Concatenation from systemds.scuro.representations.average import Average from systemds.scuro.drsearch.fusion_optimizer import FusionOptimizer @@ -115,7 +118,9 @@ class TestMultimodalRepresentationOptimizer(unittest.TestCase): def setUpClass(cls): cls.num_instances = 10 cls.mods = [ModalityType.VIDEO, ModalityType.AUDIO, ModalityType.TEXT] - cls.labels = np.random.choice([0, 1], size=cls.num_instances) + cls.labels = ModalityRandomDataGenerator().create_balanced_labels( + num_instances=cls.num_instances + ) cls.indices = np.array(range(cls.num_instances)) split = train_test_split( @@ -123,31 +128,15 @@ def setUpClass(cls): cls.labels, test_size=0.2, random_state=42, + stratify=cls.labels, ) cls.train_indizes, cls.val_indizes = [int(i) for i in split[0]], [ int(i) for i in split[1] ] - cls.tasks = [ - Task( - "UnimodalRepresentationTask1", - TestSVM(), - cls.labels, - cls.train_indizes, - cls.val_indizes, - ), - Task( - "UnimodalRepresentationTask2", - TestCNN(), - cls.labels, - cls.train_indizes, - cls.val_indizes, - ), - ] - def test_multimodal_fusion(self): task = Task( - "UnimodalRepresentationTask1", + "MM_Fusion_Task1", TestSVM(), self.labels, self.train_indizes, @@ -192,22 +181,47 @@ def test_multimodal_fusion(self): ): registry = Registry() registry._fusion_operators = [Average, Concatenation] - unimodal_optimizer = UnimodalRepresentationOptimizer( - [text, audio, video], [task], max_chain_depth=2 + unimodal_optimizer = UnimodalOptimizer( + [audio, text, video], [task], debug=False ) unimodal_optimizer.optimize() + unimodal_optimizer.operator_performance.get_k_best_results(audio, 2, task) - multimodal_optimizer = FusionOptimizer( + multimodal_optimizer = MultimodalOptimizer( [audio, text, video], - task, - unimodal_optimizer.optimization_results, - unimodal_optimizer.cache, - 2, - 2, + unimodal_optimizer.operator_performance, + [task], debug=False, ) + multimodal_optimizer.optimize() + assert ( + len(multimodal_optimizer.optimization_results.results["TestSVM"].keys()) + == 57 + ) + assert ( + len( + multimodal_optimizer.optimization_results.results["TestSVM"][ + "0_1_2_3_4_5" + ] + ) + == 62 + ) + assert ( + len( + multimodal_optimizer.optimization_results.results["TestSVM"][ + "3_4_5" + ] + ) + == 6 + ) + assert ( + len(multimodal_optimizer.optimization_results.results["TestSVM"]["0_1"]) + == 2 + ) + if __name__ == "__main__": + freeze_support() unittest.main() diff --git a/src/main/python/tests/scuro/test_unimodal_optimizer.py b/src/main/python/tests/scuro/test_unimodal_optimizer.py index 192567e92ee..a73d7b5fcc1 100644 --- a/src/main/python/tests/scuro/test_unimodal_optimizer.py +++ b/src/main/python/tests/scuro/test_unimodal_optimizer.py @@ -104,7 +104,9 @@ class TestUnimodalRepresentationOptimizer(unittest.TestCase): def setUpClass(cls): cls.num_instances = 10 cls.mods = [ModalityType.VIDEO, ModalityType.AUDIO, ModalityType.TEXT] - cls.labels = np.random.choice([0, 1], size=cls.num_instances) + cls.labels = ModalityRandomDataGenerator().create_balanced_labels( + num_instances=cls.num_instances + ) cls.indices = np.array(range(cls.num_instances)) split = train_test_split( @@ -182,7 +184,7 @@ def optimize_unimodal_representation_for_modality(self, modality): ): registry = Registry() - unimodal_optimizer = UnimodalOptimizer([modality], self.tasks) + unimodal_optimizer = UnimodalOptimizer([modality], self.tasks, False) unimodal_optimizer.optimize() assert ( @@ -190,10 +192,11 @@ def optimize_unimodal_representation_for_modality(self, modality): == modality.modality_id ) assert len(unimodal_optimizer.operator_performance.task_names) == 2 - assert ( - len(unimodal_optimizer.get_k_best_results(modality, 1, self.tasks[0])) - == 1 + result, cached = unimodal_optimizer.operator_performance.get_k_best_results( + modality, 1, self.tasks[0] ) + assert len(result) == 1 + assert len(cached) == 1 if __name__ == "__main__": From e1711b433f279574b6690d3e152d1e5e5762d8e5 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Mon, 18 Aug 2025 12:55:25 +0200 Subject: [PATCH 17/19] remove test --- .../scuro/test_fusion_representations.py | 78 ------------------- 1 file changed, 78 deletions(-) delete mode 100644 src/main/python/tests/scuro/test_fusion_representations.py diff --git a/src/main/python/tests/scuro/test_fusion_representations.py b/src/main/python/tests/scuro/test_fusion_representations.py deleted file mode 100644 index 6aaeb2a4fbe..00000000000 --- a/src/main/python/tests/scuro/test_fusion_representations.py +++ /dev/null @@ -1,78 +0,0 @@ -# ------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# ------------------------------------------------------------- -import unittest - -import numpy as np -from systemds.scuro.representations.window_aggregation import WindowAggregation -from systemds.scuro.representations.aggregate import Aggregation -from systemds.scuro.representations.wav2vec import Wav2Vec -from systemds.scuro.representations.tfidf import TfIdf -from systemds.scuro.representations.spectrogram import Spectrogram -from systemds.scuro.representations.bow import BoW -from systemds.scuro.representations.mel_spectrogram import MelSpectrogram -from systemds.scuro.representations.bert import Bert -from systemds.scuro.representations.mfcc import MFCC -from systemds.scuro.representations.multimodal_attention_fusion import ( - MultiModalAttentionFusion, - AttentionFusion, -) -from systemds.scuro.representations.resnet import ResNet -from systemds.scuro.modality.unimodal_modality import UnimodalModality -from systemds.scuro.modality.type import ModalityType -from tests.scuro.data_generator import ModalityRandomDataGenerator, TestDataLoader - - -class TestFusionOrders(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.num_instances = 40 - cls.indices = np.array(range(cls.num_instances)) - audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data( - cls.num_instances, 100 - ) - text_data, text_md = ModalityRandomDataGenerator().create_text_data( - cls.num_instances - ) - video_data, video_md = ModalityRandomDataGenerator().create_visual_modality( - cls.num_instances, 60 - ) - cls.audio = UnimodalModality( - TestDataLoader( - cls.indices, None, ModalityType.AUDIO, audio_data, np.float32, audio_md - ) - ) - cls.video = UnimodalModality( - TestDataLoader( - cls.indices, 10, ModalityType.VIDEO, video_data, np.float32, video_md - ) - ) - cls.text = UnimodalModality( - TestDataLoader( - cls.indices, None, ModalityType.TEXT, text_data, str, text_md - ) - ) - - def test_attention(self): - r_a = self.audio.apply_representation(MelSpectrogram()) - r_t = self.text.apply_representation(TfIdf()) - r_v = self.video.apply_representation(ResNet()) - - fused = AttentionFusion().transform([r_a, r_v, r_t]) From 02ceb15cf2782ed8db1bca92d9e2ec9f3b4e37ab Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Mon, 18 Aug 2025 12:59:50 +0200 Subject: [PATCH 18/19] add missing header --- .../systemds/scuro/utils/static_variables.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/main/python/systemds/scuro/utils/static_variables.py b/src/main/python/systemds/scuro/utils/static_variables.py index b1b3e657a24..8237cdf1b3e 100644 --- a/src/main/python/systemds/scuro/utils/static_variables.py +++ b/src/main/python/systemds/scuro/utils/static_variables.py @@ -1,3 +1,23 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- import numpy as np import torch From a6f21e0aff51b5e0730882d764a02db0c51986e7 Mon Sep 17 00:00:00 2001 From: Christina Dionysio Date: Mon, 18 Aug 2025 13:22:00 +0200 Subject: [PATCH 19/19] add missing header --- .../scuro/drsearch/multimodal_optimizer.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py index fdda2b6cd20..2da8e7ae195 100644 --- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py +++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py @@ -1,3 +1,23 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- import itertools from systemds.scuro.representations.aggregated_representation import (