Skip to content
Merged
83 changes: 68 additions & 15 deletions src/main/python/systemds/scuro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,55 @@
from systemds.scuro.dataloader.text_loader import TextLoader
from systemds.scuro.dataloader.json_loader import JSONLoader
from systemds.scuro.representations.representation import Representation
from systemds.scuro.representations.aggregate import Aggregation
from systemds.scuro.representations.aggregated_representation import (
AggregatedRepresentation,
)
from systemds.scuro.representations.average import Average
from systemds.scuro.representations.bert import Bert
from systemds.scuro.representations.bow import BoW
from systemds.scuro.representations.concatenation import Concatenation
from systemds.scuro.representations.sum import Sum
from systemds.scuro.representations.context import Context
from systemds.scuro.representations.fusion import Fusion
from systemds.scuro.representations.glove import GloVe
from systemds.scuro.representations.lstm import LSTM
from systemds.scuro.representations.max import RowMax
from systemds.scuro.representations.multiplication import Multiplication
from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
from systemds.scuro.representations.mfcc import MFCC
from systemds.scuro.representations.multiplication import Multiplication
from systemds.scuro.representations.optical_flow import OpticalFlow
from systemds.scuro.representations.representation import Representation
from systemds.scuro.representations.representation_dataloader import NPY
from systemds.scuro.representations.representation_dataloader import JSON
from systemds.scuro.representations.representation_dataloader import Pickle
from systemds.scuro.representations.resnet import ResNet
from systemds.scuro.representations.bert import Bert
from systemds.scuro.representations.lstm import LSTM
from systemds.scuro.representations.bow import BoW
from systemds.scuro.representations.glove import GloVe
from systemds.scuro.representations.spectrogram import Spectrogram
from systemds.scuro.representations.sum import Sum
from systemds.scuro.representations.swin_video_transformer import SwinVideoTransformer
from systemds.scuro.representations.tfidf import TfIdf
from systemds.scuro.representations.unimodal import UnimodalRepresentation
from systemds.scuro.representations.wav2vec import Wav2Vec
from systemds.scuro.representations.window import WindowAggregation
from systemds.scuro.representations.word2vec import W2V
from systemds.scuro.representations.x3d import X3D
from systemds.scuro.models.model import Model
from systemds.scuro.models.discrete_model import DiscreteModel
from systemds.scuro.modality.joined import JoinedModality
from systemds.scuro.modality.joined_transformed import JoinedTransformedModality
from systemds.scuro.modality.modality import Modality
from systemds.scuro.modality.unimodal_modality import UnimodalModality
from systemds.scuro.modality.modality_identifier import ModalityIdentifier
from systemds.scuro.modality.transformed import TransformedModality
from systemds.scuro.modality.type import ModalityType
from systemds.scuro.aligner.dr_search import DRSearch
from systemds.scuro.aligner.task import Task
from systemds.scuro.modality.unimodal_modality import UnimodalModality
from systemds.scuro.drsearch.dr_search import DRSearch
from systemds.scuro.drsearch.task import Task
from systemds.scuro.drsearch.fusion_optimizer import FusionOptimizer
from systemds.scuro.drsearch.operator_registry import Registry
from systemds.scuro.drsearch.optimization_data import OptimizationData
from systemds.scuro.drsearch.representation_cache import RepresentationCache
from systemds.scuro.drsearch.unimodal_representation_optimizer import (
UnimodalRepresentationOptimizer,
)


__all__ = [
Expand All @@ -53,25 +81,50 @@
"VideoLoader",
"TextLoader",
"Representation",
"Aggregation",
"AggregatedRepresentation",
"Average",
"Bert",
"BoW",
"Concatenation",
"Sum",
"Context",
"Fusion",
"GloVe",
"LSTM",
"RowMax",
"Multiplication",
"MelSpectrogram",
"MFCC",
"Multiplication",
"OpticalFlow",
"Representation",
"NPY",
"JSON",
"Pickle",
"ResNet",
"Bert",
"LSTM",
"Spectrogram",
"Sum",
"BoW",
"GloVe",
"SwinVideoTransformer",
"TfIdf",
"UnimodalRepresentation",
"Wav2Vec",
"WindowAggregation",
"W2V",
"X3D",
"Model",
"DiscreteModel",
"JoinedModality",
"JoinedTransformedModality",
"Modality",
"UnimodalModality",
"ModalityIdentifier",
"TransformedModality",
"ModalityType",
"UnimodalModality",
"DRSearch",
"Task",
"FusionOptimizer",
"Registry",
"OptimizationData",
"RepresentationCache",
"UnimodalRepresentationOptimizer",
]
48 changes: 0 additions & 48 deletions src/main/python/systemds/scuro/aligner/alignment.py

This file was deleted.

11 changes: 10 additions & 1 deletion src/main/python/systemds/scuro/dataloader/audio_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,22 @@

class AudioLoader(BaseLoader):
def __init__(
self, source_path: str, indices: List[str], chunk_size: Optional[int] = None
self,
source_path: str,
indices: List[str],
chunk_size: Optional[int] = None,
normalize: bool = True,
):
super().__init__(source_path, indices, chunk_size, ModalityType.AUDIO)
self.normalize = normalize

def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
self.file_sanity_check(file)
audio, sr = librosa.load(file)

if self.normalize:
audio = librosa.util.normalize(audio)

self.metadata[file] = self.modality_type.create_audio_metadata(sr, audio)

self.data.append(audio)
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import random
from typing import List

from systemds.scuro.aligner.task import Task
from systemds.scuro.drsearch.task import Task
from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.representation import Representation

Expand Down Expand Up @@ -111,7 +111,7 @@ def fit_random(self, seed=-1):
representation = random.choice(self.representations)

modality = modality_combination[0].combine(
modality_combination[1:], representation
list(modality_combination[1:]), representation
)

scores = self.task.run(modality.data)
Expand Down
Loading
Loading