diff --git a/src/main/python/systemds/scuro/__init__.py b/src/main/python/systemds/scuro/__init__.py index e74ae53f364..8b5a8621d1d 100644 --- a/src/main/python/systemds/scuro/__init__.py +++ b/src/main/python/systemds/scuro/__init__.py @@ -20,6 +20,7 @@ # ------------------------------------------------------------- from systemds.scuro.dataloader.base_loader import BaseLoader from systemds.scuro.dataloader.audio_loader import AudioLoader +from systemds.scuro.dataloader.image_loader import ImageLoader from systemds.scuro.dataloader.video_loader import VideoLoader from systemds.scuro.dataloader.text_loader import TextLoader from systemds.scuro.dataloader.json_loader import JSONLoader @@ -103,6 +104,7 @@ __all__ = [ "BaseLoader", + "ImageLoader", "AudioLoader", "VideoLoader", "TextLoader", diff --git a/src/main/python/systemds/scuro/dataloader/audio_loader.py b/src/main/python/systemds/scuro/dataloader/audio_loader.py index d8080c607d0..8b224e6f259 100644 --- a/src/main/python/systemds/scuro/dataloader/audio_loader.py +++ b/src/main/python/systemds/scuro/dataloader/audio_loader.py @@ -47,7 +47,7 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): if not self.load_data_from_file: import numpy as np - self.metadata[file] = self.modality_type.create_audio_metadata( + self.metadata[file] = self.modality_type.create_metadata( 1000, np.array([0]) ) else: @@ -56,6 +56,6 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): if self.normalize: audio = librosa.util.normalize(audio) - self.metadata[file] = self.modality_type.create_audio_metadata(sr, audio) + self.metadata[file] = self.modality_type.create_metadata(sr, audio) self.data.append(audio) diff --git a/src/main/python/systemds/scuro/dataloader/base_loader.py b/src/main/python/systemds/scuro/dataloader/base_loader.py index 33b418efb30..777c72d5d5c 100644 --- a/src/main/python/systemds/scuro/dataloader/base_loader.py +++ b/src/main/python/systemds/scuro/dataloader/base_loader.py @@ -34,6 +34,7 @@ def __init__( data_type: Union[np.dtype, str], chunk_size: Optional[int] = None, modality_type=None, + ext=None, ): """ Base class to load raw data for a given list of indices and stores them in the data object @@ -53,6 +54,7 @@ def __init__( self._num_chunks = 1 self._chunk_size = None self._data_type = data_type + self._ext = ext if chunk_size: self.chunk_size = chunk_size @@ -136,9 +138,10 @@ def get_file_names(self, indices=None): is_dir = True if os.path.isdir(self.source_path) else False file_names = [] if is_dir: - _, ext = os.path.splitext(os.listdir(self.source_path)[0]) + if self._ext is None: + _, self._ext = os.path.splitext(os.listdir(self.source_path)[0]) for index in self.indices if indices is None else indices: - file_names.append(self.source_path + index + ext) + file_names.append(self.source_path + index + self._ext) return file_names else: return self.source_path @@ -155,10 +158,10 @@ def file_sanity_check(file): try: file_size = os.path.getsize(file) except: - raise (f"Error: File {0} not found!".format(file)) + raise ValueError(f"Error: File {0} not found!".format(file)) if file_size == 0: - raise ("File {0} is empty".format(file)) + raise ValueError("File {0} is empty".format(file)) @staticmethod def resolve_data_type(data_type): diff --git a/src/main/python/systemds/scuro/dataloader/image_loader.py b/src/main/python/systemds/scuro/dataloader/image_loader.py new file mode 100644 index 00000000000..0667e703b12 --- /dev/null +++ b/src/main/python/systemds/scuro/dataloader/image_loader.py @@ -0,0 +1,63 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +from typing import List, Optional, Union + +import numpy as np + +from systemds.scuro.dataloader.base_loader import BaseLoader +import cv2 +from systemds.scuro.modality.type import ModalityType + + +class ImageLoader(BaseLoader): + def __init__( + self, + source_path: str, + indices: List[str], + data_type: Union[np.dtype, str] = np.float16, + chunk_size: Optional[int] = None, + load=True, + ext=".jpg", + ): + super().__init__( + source_path, indices, data_type, chunk_size, ModalityType.IMAGE, ext + ) + self.load_data_from_file = load + + def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): + self.file_sanity_check(file) + + image = cv2.imread(file, cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + if image.ndim == 2: + height, width = image.shape + channels = 1 + else: + height, width, channels = image.shape + + image = image.astype(np.float32) / 255.0 + + self.metadata[file] = self.modality_type.create_metadata( + width, height, channels + ) + + self.data.append(image) diff --git a/src/main/python/systemds/scuro/dataloader/json_loader.py b/src/main/python/systemds/scuro/dataloader/json_loader.py index a355edded89..89ba6b43d51 100644 --- a/src/main/python/systemds/scuro/dataloader/json_loader.py +++ b/src/main/python/systemds/scuro/dataloader/json_loader.py @@ -32,20 +32,28 @@ def __init__( self, source_path: str, indices: List[str], - field: str, + field: str, # TODO: make this a list so it is easier to get multiple fields from a json file. (i.e. Mustard: context + sentence) data_type: Union[np.dtype, str] = str, chunk_size: Optional[int] = None, + ext: str = ".json", ): - super().__init__(source_path, indices, data_type, chunk_size, ModalityType.TEXT) + super().__init__( + source_path, indices, data_type, chunk_size, ModalityType.TEXT, ext + ) self.field = field def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): self.file_sanity_check(file) with open(file) as f: json_file = json.load(f) + + if isinstance(index, str): + index = [index] for idx in index: - sentence = json_file[idx][self.field] - self.data.append(sentence) - self.metadata[idx] = self.modality_type.create_text_metadata( - len(sentence), sentence - ) + try: + text = json_file[idx][self.field] + except: + text = json_file[self.field] + + self.data.append(text) + self.metadata[idx] = self.modality_type.create_metadata(len(text), text) diff --git a/src/main/python/systemds/scuro/dataloader/text_loader.py b/src/main/python/systemds/scuro/dataloader/text_loader.py index 6689fb6d92b..ff75a4f3180 100644 --- a/src/main/python/systemds/scuro/dataloader/text_loader.py +++ b/src/main/python/systemds/scuro/dataloader/text_loader.py @@ -43,7 +43,7 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): if self.prefix: line = re.sub(self.prefix, "", line) line = line.replace("\n", "") - self.metadata[file] = self.modality_type.create_text_metadata( + self.metadata[file] = self.modality_type.create_metadata( len(line.split()), line ) self.data.append(line) diff --git a/src/main/python/systemds/scuro/dataloader/timeseries_loader.py b/src/main/python/systemds/scuro/dataloader/timeseries_loader.py index 6887d6974f2..1e6bfa8fc2c 100644 --- a/src/main/python/systemds/scuro/dataloader/timeseries_loader.py +++ b/src/main/python/systemds/scuro/dataloader/timeseries_loader.py @@ -70,12 +70,12 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): data = self._normalize_signals(data) if file: - self.metadata[index] = self.modality_type.create_ts_metadata( + self.metadata[index] = self.modality_type.create_metadata( self.signal_names, data, self.sampling_rate ) else: for i, index in enumerate(self.indices): - self.metadata[str(index)] = self.modality_type.create_ts_metadata( + self.metadata[str(index)] = self.modality_type.create_metadata( self.signal_names, data[i], self.sampling_rate ) self.data.append(data) diff --git a/src/main/python/systemds/scuro/dataloader/video_loader.py b/src/main/python/systemds/scuro/dataloader/video_loader.py index 0e77d5dc57b..2c154ecbafe 100644 --- a/src/main/python/systemds/scuro/dataloader/video_loader.py +++ b/src/main/python/systemds/scuro/dataloader/video_loader.py @@ -46,7 +46,7 @@ def __init__( def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): self.file_sanity_check(file) # if not self.load_data_from_file: - # self.metadata[file] = self.modality_type.create_video_metadata( + # self.metadata[file] = self.modality_type.create_metadata( # 30, 10, 100, 100, 3 # ) # else: @@ -67,7 +67,7 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) num_channels = 3 - self.metadata[file] = self.modality_type.create_video_metadata( + self.metadata[file] = self.modality_type.create_metadata( self.fps, length, width, height, num_channels ) diff --git a/src/main/python/systemds/scuro/modality/joined_transformed.py b/src/main/python/systemds/scuro/modality/joined_transformed.py index 078ce86f7af..442d72fe76f 100644 --- a/src/main/python/systemds/scuro/modality/joined_transformed.py +++ b/src/main/python/systemds/scuro/modality/joined_transformed.py @@ -36,7 +36,8 @@ def __init__(self, left_modality, right_modality, transformation): :param transformation: Representation to be applied on the modality """ super().__init__( - reduce(or_, [left_modality.modality_type], right_modality.modality_type), + left_modality.modality_type, + # reduce(or_, [left_modality.modality_type], right_modality.modality_type), data_type=left_modality.data_type, ) self.transformation = transformation diff --git a/src/main/python/systemds/scuro/modality/transformed.py b/src/main/python/systemds/scuro/modality/transformed.py index 7e8e54eff33..f7739f03df9 100644 --- a/src/main/python/systemds/scuro/modality/transformed.py +++ b/src/main/python/systemds/scuro/modality/transformed.py @@ -87,7 +87,8 @@ def join(self, right, join_condition): right.extract_raw_data() joined_modality = JoinedModality( - reduce(or_, [right.modality_type], self.modality_type), + self.modality_type, + # reduce(or_, [right.modality_type], self.modality_type), #TODO self, right, join_condition, @@ -136,8 +137,10 @@ def combine(self, other: Union[Modality, List[Modality]], fusion_method): fused_modality = TransformedModality( self, fusion_method, ModalityType.EMBEDDING ) + start_time = time.time() fused_modality.data = fusion_method.transform(self.create_modality_list(other)) - + end_time = time.time() + fused_modality.transform_time = end_time - start_time return fused_modality def combine_with_training( @@ -147,7 +150,12 @@ def combine_with_training( self, fusion_method, ModalityType.EMBEDDING ) modalities = self.create_modality_list(other) + start_time = time.time() fused_modality.data = fusion_method.transform_with_training(modalities, task) + end_time = time.time() + fused_modality.transform_time = ( + end_time - start_time + ) # Note: this incldues the training time return fused_modality diff --git a/src/main/python/systemds/scuro/modality/type.py b/src/main/python/systemds/scuro/modality/type.py index 382e2631ad6..c6f713df240 100644 --- a/src/main/python/systemds/scuro/modality/type.py +++ b/src/main/python/systemds/scuro/modality/type.py @@ -195,6 +195,14 @@ class ModalityType(Flag): EMBEDDING = auto() PHYSIOLOGICAL = auto() + _metadata_factory_methods = { + "TEXT": "create_text_metadata", + "AUDIO": "create_audio_metadata", + "VIDEO": "create_video_metadata", + "IMAGE": "create_image_metadata", + "TIMESERIES": "create_ts_metadata", + } + def get_schema(self): return ModalitySchemas.get(self.name) @@ -215,6 +223,27 @@ def add_field_for_instances(self, md, field, data): return md + def create_metadata(self, *args, **kwargs): + if self.name is None or "|" in self.name: + raise ValueError( + f"Composite modality types are not supported for metadata creation: {self}" + ) + + factory_methods = type(self)._metadata_factory_methods + method_name = factory_methods.value.get(self.name) + if method_name is None: + raise NotImplementedError( + f"Metadata creation not implemented for modality type: {self.name}" + ) + + method = getattr(type(self), method_name, None) + if method is None: + raise NotImplementedError( + f"Metadata creation method '{method_name}' not found for {self.name}" + ) + + return method(self, *args, **kwargs) + def create_audio_metadata(self, sampling_rate, data, is_single_instance=True): md = deepcopy(self.get_schema()) md = ModalitySchemas.update_base_metadata(md, data, is_single_instance) diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py b/src/main/python/systemds/scuro/modality/unimodal_modality.py index 4ae1067c629..5898ea98c1f 100644 --- a/src/main/python/systemds/scuro/modality/unimodal_modality.py +++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py @@ -72,7 +72,8 @@ def join(self, other, join_condition): self.data_loader.update_chunk_sizes(other.data_loader) joined_modality = JoinedModality( - reduce(or_, [other.modality_type], self.modality_type), + self.modality_type, + # reduce(or_, [other.modality_type], self.modality_type), # TODO self, other, join_condition, @@ -162,16 +163,21 @@ def apply_representation(self, representation): np.concatenate((embeddings, padding), axis=1) ) else: - padded = np.pad( - embeddings, - pad_width=( - (0, padding_needed) - if len(embeddings.shape) == 1 - else ((0, padding_needed), (0, 0)) - ), - mode="constant", - constant_values=0, - ) + if len(embeddings.shape) == 1: + padded = np.zeros( + embeddings.shape[0] + padding_needed, + dtype=embeddings.dtype, + ) + padded[: embeddings.shape[0]] = embeddings + else: + padded = np.zeros( + ( + embeddings.shape[0] + padding_needed, + embeddings.shape[1], + ), + dtype=embeddings.dtype, + ) + padded[: embeddings.shape[0], :] = embeddings padded_embeddings.append(padded) else: padded_embeddings.append(embeddings) diff --git a/src/main/python/tests/scuro/data_generator.py b/src/main/python/tests/scuro/data_generator.py index 76769065054..3c43cabb3ee 100644 --- a/src/main/python/tests/scuro/data_generator.py +++ b/src/main/python/tests/scuro/data_generator.py @@ -74,19 +74,19 @@ def create1DModality( self.modality_type = modality_type for i in range(num_instances): if modality_type == ModalityType.AUDIO: - self.metadata[i] = modality_type.create_audio_metadata( + self.metadata[i] = modality_type.create_metadata( num_features / 10, data[i] ) elif modality_type == ModalityType.TEXT: - self.metadata[i] = modality_type.create_text_metadata( + self.metadata[i] = modality_type.create_metadata( num_features / 10, data[i] ) elif modality_type == ModalityType.VIDEO: - self.metadata[i] = modality_type.create_video_metadata( + self.metadata[i] = modality_type.create_metadata( num_features / 30, 10, 0, 0, 1 ) elif modality_type == ModalityType.TIMESERIES: - self.metadata[i] = modality_type.create_ts_metadata(["test"], data[i]) + self.metadata[i] = modality_type.create_metadata(["test"], data[i]) else: raise NotImplementedError @@ -96,6 +96,7 @@ def create1DModality( return tf_modality def create_audio_data(self, num_instances, max_audio_length): + modality_type = ModalityType.AUDIO data = [ [ random.random() @@ -108,7 +109,7 @@ def create_audio_data(self, num_instances, max_audio_length): data[i] = np.array(data[i]).astype(self.data_type) metadata = { - i: ModalityType.AUDIO.create_audio_metadata(16000, np.array(data[i])) + i: modality_type.create_metadata(16000, np.array(data[i])) for i in range(num_instances) } @@ -122,7 +123,7 @@ def create_timeseries_data(self, num_instances, sequence_length, num_features=1) if num_features == 1: data = [d.squeeze(-1) for d in data] metadata = { - i: ModalityType.TIMESERIES.create_ts_metadata( + i: ModalityType.TIMESERIES.create_metadata( [f"feature_{j}" for j in range(num_features)], data[i] ) for i in range(num_instances) @@ -186,7 +187,7 @@ def create_text_data(self, num_instances): sentences.append(sentence) metadata = { - i: ModalityType.TEXT.create_text_metadata(len(sentences[i]), sentences[i]) + i: ModalityType.TEXT.create_metadata(len(sentences[i]), sentences[i]) for i in range(num_instances) } @@ -200,13 +201,13 @@ def create_visual_modality( np.random.randint( 0, 256, - (np.random.randint(1, max_num_frames + 1), height, width, 3), + (np.random.randint(10, max_num_frames + 1), height, width, 3), dtype=np.uint8, ) for _ in range(num_instances) ] metadata = { - i: ModalityType.VIDEO.create_video_metadata( + i: ModalityType.VIDEO.create_metadata( 30, data[i].shape[0], width, height, 3 ) for i in range(num_instances) @@ -222,7 +223,7 @@ def create_visual_modality( for _ in range(num_instances) ] metadata = { - i: ModalityType.IMAGE.create_image_metadata(width, height, 3) + i: ModalityType.IMAGE.create_metadata(width, height, 3) for i in range(num_instances) }