-
-
Notifications
You must be signed in to change notification settings - Fork 6
Open
Description
Hey there, I had to install banquet like this:
%%writefile a.sh
git clone https://github.com/kwatcharasupat/query-bandit.git
cd query-bandit
export CONFIG_ROOT="./config"
wget https://zenodo.org/records/13694558/files/ev-pre-aug.ckpt?download=1 -O ev-pre-aug.ckpt
pip install pipreqs
pipreqs
pip uninstall librosa -y
sed -i '/^librosa/s/^/#/' requirements.txt
sed -i '/^[[:space:]]*os.makedirs/s/^/#/' /content/query-bandit/train.py
pip install -r requirements.txt
pip install librosaWhen running inference i am getting this:
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python /content/query-bandit/train.py inference_byoq --ckpt_path="ev-pre-aug.ckpt" --input_path="inputAudio1.wav" --output_path="out.wav" --query_path="inputAudio2.wav" --use_cuda=true --batch_size=12
28, 'rnn_dim': 256, 'bidirectional': True, 'rnn_type': 'GRU', 'mlp_dim': 512, 'hidden_activation': 'Tanh', 'hidden_activation_kwargs': None, 'complex_mask': True, 'use_freq_weights': True, 'n_fft': 2048, 'win_length': 2048, 'hop_length': 512, 'window_fn': 'hann_window', 'wkwargs': None, 'power': None, 'center': True, 'normalized': True, 'pad_mode': 'reflect', 'onesided': True, 'fs': 44100}}, 'data': {'data_root': '${oc.env:DATA_ROOT}/moisesdb', 'cls': 'MoisesTestDataModule', 'batch_size': 1, 'effective_batch_size': None, 'num_workers': 8, 'inference_kwargs': {'chunk_size_seconds': 6.0, 'hop_size_seconds': 0.5, 'batch_size': 12, 'fs': 44100}, 'test_kwargs': {'npy_memmap': True, 'mixture_stem': 'mixture', 'use_own_query': False, 'allowed_stems': ['drums', 'lead_male_singer', 'lead_female_singer', 'background_vocals', 'bass_guitar', 'bass_synthesizer', 'fx', 'clean_electric_guitar', 'distorted_electric_guitar', 'acoustic_guitar', 'other_plucked', 'pitched_percussion', 'grand_piano', 'electric_piano', 'organ_electric_organ', 'synth_pad', 'synth_lead', 'string_section', 'other_strings', 'brass', 'reeds', 'other_wind'], 'query_file': 'query-10s'}, 'n_channels': 2}, 'trainer': {'callbacks': {'checkpoint': {'monitor': 'val/loss', 'mode': 'min', 'save_top_k': 3, 'save_last': True}}, 'max_epochs': 150, 'accumulate_grad_batches': None, 'gradient_clip_val': 10.0, 'gradient_clip_algorithm': 'norm', 'logger': {'save_dir': '${oc.env:LOG_ROOT}/e2e'}}, 'loss': {'cls': 'L1SNRLoss', 'modality': ['audio', 'spectrogram']}, 'optim': {'optimizer': {'cls': 'Adam', 'kwargs': {'lr': 0.001}}, 'scheduler': {'cls': 'StepLR', 'kwargs': {'step_size': 1, 'gamma': 0.98}}}, 'fast_run': False, 'stems': ['drums', 'lead_male_singer', 'lead_female_singer', 'background_vocals', 'bass_guitar', 'bass_synthesizer', 'fx', 'clean_electric_guitar', 'distorted_electric_guitar', 'acoustic_guitar', 'other_plucked', 'pitched_percussion', 'grand_piano', 'electric_piano', 'organ_electric_organ', 'synth_pad', 'synth_lead', 'string_section', 'other_strings', 'brass', 'reeds', 'other_wind']}
{'chunk_size_seconds': 6.0, 'hop_size_seconds': 0.5, 'batch_size': 12, 'fs': 44100}
Warning: FMAX is None setting to 15000
Loading PASST TRAINED ON OpenMIC-2008
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(4): Block(
(norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=768, out_features=2304, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=768, out_features=768, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): Identity()
(norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(5): Block(
(norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=768, out_features=2304, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(qkv): Linear(in_features=768, out_features=2304, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=768, out_features=768, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): Identity()
(norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(10): Block(
(norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=768, out_features=2304, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=768, out_features=768, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): Identity()
(norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dr
)
)
(11): Block(
(norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=768, out_features=2304, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=768, out_features=768, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): Identity()
(norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(pre_logits): Identity()
(head): Sequential(
(0): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(1): Linear(in_features=768, out_features=527, bias=True)
)
(head_dist): Linear(in_features=768, out_features=527, bias=True)
)
{'optimizer': {'cls': 'Adam', 'kwargs': {'lr': 0.001}}, 'scheduler': {'cls': 'StepLR', 'kwargs': {'step_size': 1, 'gamma': 0.98}}}
Query is longer than 10.0 seconds. Truncating.
Traceback (most recent call last):
File "/content/query-bandit/train.py", line 752, in <module>
fire.Fire()
File "/usr/local/lib/python3.11/dist-packages/fire/core.py", line 135, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/fire/core.py", line 468, in _Fire
component, remaining_args = _CallAndUpdateTrace(
^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/fire/core.py", line 684, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^
File "/content/query-bandit/train.py", line 738, in inference_byoq
out = system.chunked_inference(batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/content/query-bandit/core/models/ebase.py", line 281, in chunked_inference
audio = F.pad(
^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/functional.py", line 5209, in pad
return torch._C._nn.pad(input, pad, mode, value)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Argument #4: Padding size should be less than the corresponding input dimension, but got: padding (485100, 1455300) at dimension 2 of input [1, 2, 661500]
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels