apache · christinadionysio · Jul 28, 2025 · Jun 4, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/src/main/python/systemds/scuro/__init__.py b/src/main/python/systemds/scuro/__init__.py
@@ -39,7 +39,7 @@
 from systemds.scuro.representations.max import RowMax
 from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
 from systemds.scuro.representations.mfcc import MFCC
-from systemds.scuro.representations.multiplication import Multiplication
+from systemds.scuro.representations.hadamard import Hadamard
 from systemds.scuro.representations.optical_flow import OpticalFlow
 from systemds.scuro.representations.representation import Representation
 from systemds.scuro.representations.representation_dataloader import NPY
@@ -52,7 +52,7 @@
 from systemds.scuro.representations.tfidf import TfIdf
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 from systemds.scuro.representations.wav2vec import Wav2Vec
-from systemds.scuro.representations.window import WindowAggregation
+from systemds.scuro.representations.window_aggregation import WindowAggregation
 from systemds.scuro.representations.word2vec import W2V
 from systemds.scuro.representations.x3d import X3D
 from systemds.scuro.models.model import Model
@@ -94,7 +94,7 @@
     "RowMax",
     "MelSpectrogram",
     "MFCC",
-    "Multiplication",
+    "Hadamard",
     "OpticalFlow",
     "Representation",
     "NPY",

diff --git a/src/main/python/systemds/scuro/dataloader/audio_loader.py b/src/main/python/systemds/scuro/dataloader/audio_loader.py
@@ -21,6 +21,8 @@
 from typing import List, Optional, Union
 
 import librosa
+import numpy as np
+
 from systemds.scuro.dataloader.base_loader import BaseLoader
 from systemds.scuro.modality.type import ModalityType
 
@@ -30,15 +32,27 @@ def __init__(
         self,
         source_path: str,
         indices: List[str],
+        data_type: Union[np.dtype, str] = np.float32,
         chunk_size: Optional[int] = None,
         normalize: bool = True,
+        load=True,
     ):
-        super().__init__(source_path, indices, chunk_size, ModalityType.AUDIO)
+        super().__init__(
+            source_path, indices, data_type, chunk_size, ModalityType.AUDIO
+        )
         self.normalize = normalize
+        self.load_data_from_file = load
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
         self.file_sanity_check(file)
-        audio, sr = librosa.load(file)
+        # if not self.load_data_from_file:
+        #     import numpy as np
+        #
+        #     self.metadata[file] = self.modality_type.create_audio_metadata(
+        #         1000, np.array([0])
+        #     )
+        # else:
+        audio, sr = librosa.load(file, dtype=self._data_type)
 
         if self.normalize:
             audio = librosa.util.normalize(audio)

diff --git a/src/main/python/systemds/scuro/dataloader/base_loader.py b/src/main/python/systemds/scuro/dataloader/base_loader.py
@@ -21,13 +21,17 @@
 import os
 from abc import ABC, abstractmethod
 from typing import List, Optional, Union
+import math
+
+import numpy as np
 
 
 class BaseLoader(ABC):
     def __init__(
         self,
         source_path: str,
         indices: List[str],
+        data_type: Union[np.dtype, str],
         chunk_size: Optional[int] = None,
         modality_type=None,
     ):
@@ -48,6 +52,7 @@ def __init__(
         self._next_chunk = 0
         self._num_chunks = 1
         self._chunk_size = None
+        self._data_type = data_type
 
         if chunk_size:
             self.chunk_size = chunk_size
@@ -59,7 +64,7 @@ def chunk_size(self):
     @chunk_size.setter
     def chunk_size(self, value):
         self._chunk_size = value
-        self._num_chunks = int(len(self.indices) / self._chunk_size)
+        self._num_chunks = int(math.ceil(len(self.indices) / self._chunk_size))
 
     @property
     def num_chunks(self):
@@ -69,6 +74,14 @@ def num_chunks(self):
     def next_chunk(self):
         return self._next_chunk
 
+    @property
+    def data_type(self):
+        return self._data_type
+
+    @data_type.setter
+    def data_type(self, data_type):
+        self._data_type = self.resolve_data_type(data_type)
+
     def reset(self):
         self._next_chunk = 0
         self.data = []
@@ -110,16 +123,25 @@ def _load_next_chunk(self):
         return self._load(next_chunk_indices)
 
     def _load(self, indices: List[str]):
-        is_dir = True if os.path.isdir(self.source_path) else False
+        file_names = self.get_file_names(indices)
+        if isinstance(file_names, str):
+            self.extract(file_names, indices)
+        else:
+            for file_name in file_names:
+                self.extract(file_name)
+
+        return self.data, self.metadata
 
+    def get_file_names(self, indices=None):
+        is_dir = True if os.path.isdir(self.source_path) else False
+        file_names = []
         if is_dir:
             _, ext = os.path.splitext(os.listdir(self.source_path)[0])
-            for index in indices:
-                self.extract(self.source_path + index + ext)
+            for index in self.indices if indices is None else indices:
+                file_names.append(self.source_path + index + ext)
+            return file_names
         else:
-            self.extract(self.source_path, indices)
-
-        return self.data, self.metadata
+            return self.source_path
 
     @abstractmethod
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
@@ -137,3 +159,30 @@ def file_sanity_check(file):
 
         if file_size == 0:
             raise ("File {0} is empty".format(file))
+
+    @staticmethod
+    def resolve_data_type(data_type):
+        if isinstance(data_type, str):
+            if data_type.lower() in [
+                "float16",
+                "float32",
+                "float64",
+                "int16",
+                "int32",
+                "int64",
+            ]:
+                return np.dtype(data_type)
+            else:
+                raise ValueError(f"Unsupported data_type string: {data_type}")
+        elif data_type in [
+            np.float16,
+            np.float32,
+            np.float64,
+            np.int16,
+            np.int32,
+            np.int64,
+            str,
+        ]:
+            return data_type
+        else:
+            raise ValueError(f"Unsupported data_type: {data_type}")
diff --git a/src/main/python/systemds/scuro/dataloader/json_loader.py b/src/main/python/systemds/scuro/dataloader/json_loader.py
@@ -20,6 +20,8 @@
 # -------------------------------------------------------------
 import json
 
+import numpy as np
+
 from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.dataloader.base_loader import BaseLoader
 from typing import Optional, List, Union
@@ -31,9 +33,10 @@ def __init__(
         source_path: str,
         indices: List[str],
         field: str,
+        data_type: Union[np.dtype, str] = str,
         chunk_size: Optional[int] = None,
     ):
-        super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
+        super().__init__(source_path, indices, data_type, chunk_size, ModalityType.TEXT)
         self.field = field
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):

diff --git a/src/main/python/systemds/scuro/dataloader/text_loader.py b/src/main/python/systemds/scuro/dataloader/text_loader.py
@@ -29,10 +29,11 @@ def __init__(
         self,
         source_path: str,
         indices: List[str],
+        data_type: str = str,
         chunk_size: Optional[int] = None,
         prefix: Optional[Pattern[str]] = None,
     ):
-        super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
+        super().__init__(source_path, indices, data_type, chunk_size, ModalityType.TEXT)
         self.prefix = prefix
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):

diff --git a/src/main/python/systemds/scuro/dataloader/video_loader.py b/src/main/python/systemds/scuro/dataloader/video_loader.py
@@ -32,12 +32,22 @@ def __init__(
         self,
         source_path: str,
         indices: List[str],
+        data_type: Union[np.dtype, str] = np.float16,
         chunk_size: Optional[int] = None,
+        load=True,
     ):
-        super().__init__(source_path, indices, chunk_size, ModalityType.VIDEO)
+        super().__init__(
+            source_path, indices, data_type, chunk_size, ModalityType.VIDEO
+        )
+        self.load_data_from_file = load
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
         self.file_sanity_check(file)
+        # if not self.load_data_from_file:
+        #     self.metadata[file] = self.modality_type.create_video_metadata(
+        #         30, 10, 100, 100, 3
+        #     )
+        # else:
         cap = cv2.VideoCapture(file)
 
         if not cap.isOpened():
@@ -60,8 +70,8 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
             if not ret:
                 break
             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frame = frame.astype(np.float32) / 255.0
+            frame = frame.astype(self._data_type) / 255.0
 
             frames.append(frame)
 
-        self.data.append(frames)
+        self.data.append(np.stack(frames))
diff --git a/src/main/python/systemds/scuro/drsearch/operator_registry.py b/src/main/python/systemds/scuro/drsearch/operator_registry.py
@@ -58,6 +58,7 @@ def get_representations(self, modality: ModalityType):
         return self._representations[modality]
 
     def get_context_operators(self):
+        # TODO: return modality specific context operations
         return self._context_operators
 
     def get_fusion_operators(self):

diff --git a/src/main/python/systemds/scuro/drsearch/representation_cache.py b/src/main/python/systemds/scuro/drsearch/representation_cache.py
@@ -112,7 +112,8 @@ def load_from_cache(self, modality, operators):
                 metadata = pickle.load(f)
 
             transformed_modality = TransformedModality(
-                modality.modality_type, op_names, modality.modality_id, metadata
+                modality,
+                op_names,
             )
             data = None
             with open(f"{filename}.pkl", "rb") as f:

diff --git a/src/main/python/systemds/scuro/modality/joined_transformed.py b/src/main/python/systemds/scuro/modality/joined_transformed.py
@@ -25,7 +25,7 @@
 
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.utils import pad_sequences
-from systemds.scuro.representations.window import WindowAggregation
+from systemds.scuro.representations.window_aggregation import WindowAggregation
 
 
 class JoinedTransformedModality(Modality):
@@ -70,7 +70,7 @@ def combine(self, fusion_method):
         self.data = pad_sequences(self.data)
         return self
 
-    def window(self, window_size, aggregation):
+    def window_aggregation(self, window_size, aggregation):
         w = WindowAggregation(window_size, aggregation)
         self.left_modality.data = w.execute(self.left_modality)
         self.right_modality.data = w.execute(self.right_modality)

diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py
@@ -29,7 +29,9 @@
 
 class Modality:
 
-    def __init__(self, modalityType: ModalityType, modality_id=-1, metadata={}):
+    def __init__(
+        self, modalityType: ModalityType, modality_id=-1, metadata={}, data_type=None
+    ):
         """
         Parent class of the different Modalities (unimodal & multimodal)
         :param modality_type: Type of the modality
@@ -38,7 +40,7 @@ def __init__(self, modalityType: ModalityType, modality_id=-1, metadata={}):
         self.schema = modalityType.get_schema()
         self.metadata = metadata
         self.data = []
-        self.data_type = None
+        self.data_type = data_type
         self.cost = None
         self.shape = None
         self.modality_id = modality_id
@@ -67,7 +69,9 @@ def copy_from_instance(self):
         """
         Create a copy of the modality instance
         """
-        return type(self)(self.modality_type, self.metadata)
+        return type(self)(
+            self.modality_type, self.modality_id, self.metadata, self.data_type
+        )
 
     def update_metadata(self):
         """

diff --git a/src/main/python/systemds/scuro/modality/transformed.py b/src/main/python/systemds/scuro/modality/transformed.py
@@ -24,24 +24,28 @@
 from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.joined import JoinedModality
 from systemds.scuro.modality.modality import Modality
-from systemds.scuro.representations.window import WindowAggregation
+from systemds.scuro.representations.window_aggregation import WindowAggregation
 
 
 class TransformedModality(Modality):
 
-    def __init__(self, modality_type, transformation, modality_id, metadata):
+    def __init__(self, modality, transformation, new_modality_type=None):
         """
         Parent class of the different Modalities (unimodal & multimodal)
         :param modality_type: Type of the original modality(ies)
         :param transformation: Representation to be applied on the modality
         """
-        super().__init__(modality_type, modality_id, metadata)
+        if new_modality_type is None:
+            new_modality_type = modality.modality_type
+
+        metadata = modality.metadata.copy() if modality.metadata is not None else None
+        super().__init__(
+            new_modality_type, modality.modality_id, metadata, modality.data_type
+        )
         self.transformation = transformation
 
     def copy_from_instance(self):
-        return type(self)(
-            self.modality_type, self.transformation, self.modality_id, self.metadata
-        )
+        return type(self)(self, self.transformation)
 
     def join(self, right, join_condition):
         chunked_execution = False
@@ -65,19 +69,15 @@ def join(self, right, join_condition):
 
         return joined_modality
 
-    def window(self, windowSize, aggregation):
-        transformed_modality = TransformedModality(
-            self.modality_type, "window", self.modality_id, self.metadata
-        )
+    def window_aggregation(self, windowSize, aggregation):
         w = WindowAggregation(windowSize, aggregation)
+        transformed_modality = TransformedModality(self, w)
         transformed_modality.data = w.execute(self)
 
         return transformed_modality
 
     def context(self, context_operator):
-        transformed_modality = TransformedModality(
-            self.modality_type, context_operator.name, self.modality_id, self.metadata
-        )
+        transformed_modality = TransformedModality(self, context_operator)
 
         transformed_modality.data = context_operator.execute(self)
         return transformed_modality
@@ -94,10 +94,7 @@ def combine(self, other, fusion_method):
         :param fusion_method: The fusion method to be used to combine modalities
         """
         fused_modality = TransformedModality(
-            ModalityType.EMBEDDING,
-            fusion_method,
-            self.modality_id,
-            self.metadata,
+            self, fusion_method, ModalityType.EMBEDDING
         )
         modalities = [self]
         if isinstance(other, list):

diff --git a/src/main/python/systemds/scuro/modality/type.py b/src/main/python/systemds/scuro/modality/type.py
@@ -191,6 +191,14 @@ def get_schema(self):
     def update_metadata(self, md, data):
         return ModalitySchemas.update_metadata(self.name, md, data)
 
+    def add_alignment(self, md, alignment_timestamps):
+        md["alignment_timestamps"] = alignment_timestamps
+        return md
+
+    def add_field(self, md, field, data):
+        md[field] = data
+        return md
+
     def create_audio_metadata(self, sampling_rate, data):
         md = deepcopy(self.get_schema())
         md = ModalitySchemas.update_base_metadata(md, data, True)