Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/main/python/systemds/scuro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from systemds.scuro.representations.max import RowMax
from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
from systemds.scuro.representations.mfcc import MFCC
from systemds.scuro.representations.multiplication import Multiplication
from systemds.scuro.representations.hadamard import Hadamard
from systemds.scuro.representations.optical_flow import OpticalFlow
from systemds.scuro.representations.representation import Representation
from systemds.scuro.representations.representation_dataloader import NPY
Expand All @@ -52,7 +52,7 @@
from systemds.scuro.representations.tfidf import TfIdf
from systemds.scuro.representations.unimodal import UnimodalRepresentation
from systemds.scuro.representations.wav2vec import Wav2Vec
from systemds.scuro.representations.window import WindowAggregation
from systemds.scuro.representations.window_aggregation import WindowAggregation
from systemds.scuro.representations.word2vec import W2V
from systemds.scuro.representations.x3d import X3D
from systemds.scuro.models.model import Model
Expand Down Expand Up @@ -94,7 +94,7 @@
"RowMax",
"MelSpectrogram",
"MFCC",
"Multiplication",
"Hadamard",
"OpticalFlow",
"Representation",
"NPY",
Expand Down
18 changes: 16 additions & 2 deletions src/main/python/systemds/scuro/dataloader/audio_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from typing import List, Optional, Union

import librosa
import numpy as np

from systemds.scuro.dataloader.base_loader import BaseLoader
from systemds.scuro.modality.type import ModalityType

Expand All @@ -30,15 +32,27 @@ def __init__(
self,
source_path: str,
indices: List[str],
data_type: Union[np.dtype, str] = np.float32,
chunk_size: Optional[int] = None,
normalize: bool = True,
load=True,
):
super().__init__(source_path, indices, chunk_size, ModalityType.AUDIO)
super().__init__(
source_path, indices, data_type, chunk_size, ModalityType.AUDIO
)
self.normalize = normalize
self.load_data_from_file = load

def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
self.file_sanity_check(file)
audio, sr = librosa.load(file)
# if not self.load_data_from_file:
# import numpy as np
#
# self.metadata[file] = self.modality_type.create_audio_metadata(
# 1000, np.array([0])
# )
# else:
audio, sr = librosa.load(file, dtype=self._data_type)

if self.normalize:
audio = librosa.util.normalize(audio)
Expand Down
63 changes: 56 additions & 7 deletions src/main/python/systemds/scuro/dataloader/base_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@
import os
from abc import ABC, abstractmethod
from typing import List, Optional, Union
import math

import numpy as np


class BaseLoader(ABC):
def __init__(
self,
source_path: str,
indices: List[str],
data_type: Union[np.dtype, str],
chunk_size: Optional[int] = None,
modality_type=None,
):
Expand All @@ -48,6 +52,7 @@ def __init__(
self._next_chunk = 0
self._num_chunks = 1
self._chunk_size = None
self._data_type = data_type

if chunk_size:
self.chunk_size = chunk_size
Expand All @@ -59,7 +64,7 @@ def chunk_size(self):
@chunk_size.setter
def chunk_size(self, value):
self._chunk_size = value
self._num_chunks = int(len(self.indices) / self._chunk_size)
self._num_chunks = int(math.ceil(len(self.indices) / self._chunk_size))

@property
def num_chunks(self):
Expand All @@ -69,6 +74,14 @@ def num_chunks(self):
def next_chunk(self):
return self._next_chunk

@property
def data_type(self):
return self._data_type

@data_type.setter
def data_type(self, data_type):
self._data_type = self.resolve_data_type(data_type)

def reset(self):
self._next_chunk = 0
self.data = []
Expand Down Expand Up @@ -110,16 +123,25 @@ def _load_next_chunk(self):
return self._load(next_chunk_indices)

def _load(self, indices: List[str]):
is_dir = True if os.path.isdir(self.source_path) else False
file_names = self.get_file_names(indices)
if isinstance(file_names, str):
self.extract(file_names, indices)
else:
for file_name in file_names:
self.extract(file_name)

return self.data, self.metadata

def get_file_names(self, indices=None):
is_dir = True if os.path.isdir(self.source_path) else False
file_names = []
if is_dir:
_, ext = os.path.splitext(os.listdir(self.source_path)[0])
for index in indices:
self.extract(self.source_path + index + ext)
for index in self.indices if indices is None else indices:
file_names.append(self.source_path + index + ext)
return file_names
else:
self.extract(self.source_path, indices)

return self.data, self.metadata
return self.source_path

@abstractmethod
def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
Expand All @@ -137,3 +159,30 @@ def file_sanity_check(file):

if file_size == 0:
raise ("File {0} is empty".format(file))

@staticmethod
def resolve_data_type(data_type):
if isinstance(data_type, str):
if data_type.lower() in [
"float16",
"float32",
"float64",
"int16",
"int32",
"int64",
]:
return np.dtype(data_type)
else:
raise ValueError(f"Unsupported data_type string: {data_type}")
elif data_type in [
np.float16,
np.float32,
np.float64,
np.int16,
np.int32,
np.int64,
str,
]:
return data_type
else:
raise ValueError(f"Unsupported data_type: {data_type}")
5 changes: 4 additions & 1 deletion src/main/python/systemds/scuro/dataloader/json_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
# -------------------------------------------------------------
import json

import numpy as np

from systemds.scuro.modality.type import ModalityType
from systemds.scuro.dataloader.base_loader import BaseLoader
from typing import Optional, List, Union
Expand All @@ -31,9 +33,10 @@ def __init__(
source_path: str,
indices: List[str],
field: str,
data_type: Union[np.dtype, str] = str,
chunk_size: Optional[int] = None,
):
super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
super().__init__(source_path, indices, data_type, chunk_size, ModalityType.TEXT)
self.field = field

def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
Expand Down
3 changes: 2 additions & 1 deletion src/main/python/systemds/scuro/dataloader/text_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ def __init__(
self,
source_path: str,
indices: List[str],
data_type: str = str,
chunk_size: Optional[int] = None,
prefix: Optional[Pattern[str]] = None,
):
super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
super().__init__(source_path, indices, data_type, chunk_size, ModalityType.TEXT)
self.prefix = prefix

def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
Expand Down
16 changes: 13 additions & 3 deletions src/main/python/systemds/scuro/dataloader/video_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,22 @@ def __init__(
self,
source_path: str,
indices: List[str],
data_type: Union[np.dtype, str] = np.float16,
chunk_size: Optional[int] = None,
load=True,
):
super().__init__(source_path, indices, chunk_size, ModalityType.VIDEO)
super().__init__(
source_path, indices, data_type, chunk_size, ModalityType.VIDEO
)
self.load_data_from_file = load

def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
self.file_sanity_check(file)
# if not self.load_data_from_file:
# self.metadata[file] = self.modality_type.create_video_metadata(
# 30, 10, 100, 100, 3
# )
# else:
cap = cv2.VideoCapture(file)

if not cap.isOpened():
Expand All @@ -60,8 +70,8 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
if not ret:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = frame.astype(np.float32) / 255.0
frame = frame.astype(self._data_type) / 255.0

frames.append(frame)

self.data.append(frames)
self.data.append(np.stack(frames))
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_representations(self, modality: ModalityType):
return self._representations[modality]

def get_context_operators(self):
# TODO: return modality specific context operations
return self._context_operators

def get_fusion_operators(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ def load_from_cache(self, modality, operators):
metadata = pickle.load(f)

transformed_modality = TransformedModality(
modality.modality_type, op_names, modality.modality_id, metadata
modality,
op_names,
)
data = None
with open(f"{filename}.pkl", "rb") as f:
Expand Down
4 changes: 2 additions & 2 deletions src/main/python/systemds/scuro/modality/joined_transformed.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.utils import pad_sequences
from systemds.scuro.representations.window import WindowAggregation
from systemds.scuro.representations.window_aggregation import WindowAggregation


class JoinedTransformedModality(Modality):
Expand Down Expand Up @@ -70,7 +70,7 @@ def combine(self, fusion_method):
self.data = pad_sequences(self.data)
return self

def window(self, window_size, aggregation):
def window_aggregation(self, window_size, aggregation):
w = WindowAggregation(window_size, aggregation)
self.left_modality.data = w.execute(self.left_modality)
self.right_modality.data = w.execute(self.right_modality)
Expand Down
10 changes: 7 additions & 3 deletions src/main/python/systemds/scuro/modality/modality.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@

class Modality:

def __init__(self, modalityType: ModalityType, modality_id=-1, metadata={}):
def __init__(
self, modalityType: ModalityType, modality_id=-1, metadata={}, data_type=None
):
"""
Parent class of the different Modalities (unimodal & multimodal)
:param modality_type: Type of the modality
Expand All @@ -38,7 +40,7 @@ def __init__(self, modalityType: ModalityType, modality_id=-1, metadata={}):
self.schema = modalityType.get_schema()
self.metadata = metadata
self.data = []
self.data_type = None
self.data_type = data_type
self.cost = None
self.shape = None
self.modality_id = modality_id
Expand Down Expand Up @@ -67,7 +69,9 @@ def copy_from_instance(self):
"""
Create a copy of the modality instance
"""
return type(self)(self.modality_type, self.metadata)
return type(self)(
self.modality_type, self.modality_id, self.metadata, self.data_type
)

def update_metadata(self):
"""
Expand Down
31 changes: 14 additions & 17 deletions src/main/python/systemds/scuro/modality/transformed.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,28 @@
from systemds.scuro.modality.type import ModalityType
from systemds.scuro.modality.joined import JoinedModality
from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.window import WindowAggregation
from systemds.scuro.representations.window_aggregation import WindowAggregation


class TransformedModality(Modality):

def __init__(self, modality_type, transformation, modality_id, metadata):
def __init__(self, modality, transformation, new_modality_type=None):
"""
Parent class of the different Modalities (unimodal & multimodal)
:param modality_type: Type of the original modality(ies)
:param transformation: Representation to be applied on the modality
"""
super().__init__(modality_type, modality_id, metadata)
if new_modality_type is None:
new_modality_type = modality.modality_type

metadata = modality.metadata.copy() if modality.metadata is not None else None
super().__init__(
new_modality_type, modality.modality_id, metadata, modality.data_type
)
self.transformation = transformation

def copy_from_instance(self):
return type(self)(
self.modality_type, self.transformation, self.modality_id, self.metadata
)
return type(self)(self, self.transformation)

def join(self, right, join_condition):
chunked_execution = False
Expand All @@ -65,19 +69,15 @@ def join(self, right, join_condition):

return joined_modality

def window(self, windowSize, aggregation):
transformed_modality = TransformedModality(
self.modality_type, "window", self.modality_id, self.metadata
)
def window_aggregation(self, windowSize, aggregation):
w = WindowAggregation(windowSize, aggregation)
transformed_modality = TransformedModality(self, w)
transformed_modality.data = w.execute(self)

return transformed_modality

def context(self, context_operator):
transformed_modality = TransformedModality(
self.modality_type, context_operator.name, self.modality_id, self.metadata
)
transformed_modality = TransformedModality(self, context_operator)

transformed_modality.data = context_operator.execute(self)
return transformed_modality
Expand All @@ -94,10 +94,7 @@ def combine(self, other, fusion_method):
:param fusion_method: The fusion method to be used to combine modalities
"""
fused_modality = TransformedModality(
ModalityType.EMBEDDING,
fusion_method,
self.modality_id,
self.metadata,
self, fusion_method, ModalityType.EMBEDDING
)
modalities = [self]
if isinstance(other, list):
Expand Down
8 changes: 8 additions & 0 deletions src/main/python/systemds/scuro/modality/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,14 @@ def get_schema(self):
def update_metadata(self, md, data):
return ModalitySchemas.update_metadata(self.name, md, data)

def add_alignment(self, md, alignment_timestamps):
md["alignment_timestamps"] = alignment_timestamps
return md

def add_field(self, md, field, data):
md[field] = data
return md

def create_audio_metadata(self, sampling_rate, data):
md = deepcopy(self.get_schema())
md = ModalitySchemas.update_base_metadata(md, data, True)
Expand Down
Loading
Loading