Skip to content

Commit 744931c

Browse files
committed
Added a node to convert audio to spectrogram and spectrigram to audio
1 parent 1ca839e commit 744931c

File tree

5 files changed

+112
-4
lines changed

5 files changed

+112
-4
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ You can also get the node from comfy manager under the name of More math.
1818
## Features
1919

2020
- functions and variables in math expressions
21-
- conversions between int and float
21+
- Conversion between INT and FLOAT; AUDIO and IMAGE (red - real - strenght of cosine of frequency; blue - imaginary - strenght of sine of frequency; green - log1p of amplitude - just so it looks good to humans)
2222
- Nodes for FLOAT, CONDITIONING, LATENT, IMAGE, NOISE, AUDIO, VIDEO, MODEL, CLIP and VAE
2323

24+
- Vector Math: Support for List literals `[v1, v2, ...]` and operations between lists/scalars
25+
2426
## Operators
2527

2628
- Math: `+`, `-`, `*`, `/`, `%`, `^`, `||`
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from comfy_api.latest import io
2+
import torch
3+
4+
windows = {'bartlet':torch.bartlett_window, 'blackman':torch.blackman_window, 'hamming':torch.hamming_window,'hann':torch.hann_window}
5+
6+
class AudioToSpectrogram(io.ComfyNode):
7+
"""
8+
Converts Audio to an Image spectrogram.
9+
Red = Real, Green = logarithm of value (just so it looks good), Blue = Imaginary.
10+
Each audio channel is stacked vertically.
11+
"""
12+
@classmethod
13+
def define_schema(cls) -> io.Schema:
14+
return io.Schema(
15+
node_id="mrmth_AudioToImageSpectrogram",
16+
category="More math",
17+
display_name="Audio to Image Spectrogram",
18+
inputs=[
19+
io.Audio.Input(id="audio", tooltip="Input audio"),
20+
io.Int.Input(id="window_length", default=1024, min=16, max=4096, tooltip="Window length in samples"),
21+
io.Int.Input(id="hop_length", default=256, min=1, max=4096, tooltip="Stride of the window (hop length) in samples"),
22+
io.Int.Input(id="bucket_count", default=513, min=2, max=4096, tooltip="Number of frequency buckets (determines resolution)"),
23+
io.Combo.Input(id="window_type", default="hann", options=list(windows.keys()), tooltip="Type of window function to apply"),
24+
],
25+
outputs=[
26+
io.Image.Output(id="image"),
27+
io.Int.Output(id="channel_count", display_name="Channel count", tooltip="Number of channels in the output image"),
28+
io.Int.Output(id="sample_rate", display_name="Sample rate", tooltip="Sample rate of the input audio"),
29+
],
30+
)
31+
32+
@classmethod
33+
def execute(cls, audio, window_length, hop_length, bucket_count,window_type):
34+
waveform = audio['waveform']
35+
sample_rate = audio['sample_rate']
36+
B, C, S = waveform.shape
37+
n_fft = (bucket_count - 1) * 2
38+
flat_waveform = waveform.reshape(B * C, S)
39+
window = windows[window_type](window_length, device=waveform.device)
40+
41+
stft_out = torch.stft(
42+
flat_waveform, n_fft=n_fft, hop_length=hop_length, win_length=window_length,
43+
window=window, center=True, pad_mode='reflect', normalized=False,
44+
onesided=True, return_complex=True
45+
)
46+
47+
F, T = stft_out.shape[1], stft_out.shape[2]
48+
stft_out = stft_out.reshape(B, C, F, T).reshape(B, C * F, T)
49+
image = torch.stack([
50+
stft_out.real,
51+
torch.log1p(torch.abs(stft_out)),
52+
stft_out.imag
53+
], dim=-1)
54+
print(image.type())
55+
return (image, C, sample_rate)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import torch
2+
from comfy_api.latest import io
3+
4+
windows = {'bartlet':torch.bartlett_window, 'blackman':torch.blackman_window, 'hamming':torch.hamming_window,'hann':torch.hann_window}
5+
6+
class SpectrogramToAudio(io.ComfyNode):
7+
"""
8+
Converts an Image spectrogram back to Audio.
9+
Red is real part and blue is imaginary. Green is ignored.
10+
"""
11+
@classmethod
12+
def define_schema(cls) -> io.Schema:
13+
return io.Schema(
14+
node_id="mrmth_ImageSpectrogramToAudio",
15+
category="More math",
16+
display_name="Image Spectrogram to Audio",
17+
inputs=[
18+
io.Image.Input(id="image", tooltip="Input spectrogram image (R=Real, G=Magnitude, B=Imaginary)"),
19+
io.Int.Input(id="channel_count", default=1, min=1, tooltip="Number of audio channels"),
20+
io.Int.Input(id="sample_rate", default=44100, min=1, tooltip="Sample rate of the output audio"),
21+
io.Int.Input(id="window_length", default=1024, min=16, tooltip="Window length in samples"),
22+
io.Int.Input(id="hop_length", default=256, min=1, tooltip="Stride of the window (hop length) in samples"),
23+
io.Combo.Input(id="window_type", default="hann", options=list(windows.keys()), tooltip="Type of window function to apply"),
24+
25+
],
26+
outputs=[
27+
io.Audio.Output(id="audio", tooltip="Output audio"),
28+
],
29+
)
30+
31+
@classmethod
32+
def execute(cls, image, channel_count, sample_rate, window_length, hop_length,window_type):
33+
B, H, W, _ = image.shape
34+
bucket_count = H // channel_count
35+
n_fft = (bucket_count - 1) * 2
36+
real = image[..., 0].reshape(B * channel_count, bucket_count, W)
37+
imag = image[..., 2].reshape(B * channel_count, bucket_count, W)
38+
stft_complex = torch.complex(real, imag)
39+
window = windows[window_type](window_length,device = image.device)
40+
waveform = torch.istft(
41+
stft_complex, n_fft=n_fft, hop_length=hop_length, win_length=window_length,
42+
window=window, center=True, normalized=False, onesided=True
43+
)
44+
45+
waveform = waveform.reshape(B, channel_count, -1)
46+
return ({"waveform": waveform, "sample_rate": sample_rate},)
47+

more_math/nodes.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
from .ImageMathNode import ImageMathNode
77
from .AudioMathNode import AudioMathNode
88
from .VideoMathNode import VideoMathNode
9-
109
from .ModelMathNode import ModelMathNode
1110
from .VaeMathNode import VAEMathNode
1211
from .ClipMathNode import CLIPMathNode
12+
from .SpectrogramToAudioNode import SpectrogramToAudio
13+
from .AudioToSpectrogramNode import AudioToSpectrogram
14+
1315
from comfy_api.latest import ComfyExtension, io
1416

1517
class IntToFloatNode(io.ComfyNode):
@@ -76,7 +78,9 @@ async def get_node_list(self) -> list[type[io.ComfyNode]]:
7678
IntToFloatNode,
7779
FloatToIntNode,
7880
AudioMathNode,
79-
VideoMathNode
81+
VideoMathNode,
82+
AudioToSpectrogram,
83+
SpectrogramToAudio
8084
]
8185
async def comfy_entrypoint() -> MoreMathExtension:
8286
return MoreMathExtension()

pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
[pytest]
2-
pythonpath = d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math/src d:/stability/Data/Packages/ComfyUI
2+
pythonpath = . d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math d:/stability/Data/Packages/ComfyUI

0 commit comments

Comments
 (0)