Added a node to convert audio to spectrogram and spectrigram to audio

mcDandy · mcDandy · commit 744931c6e350 · 2025-12-29T20:04:58.000+01:00
diff --git a/README.md b/README.md
@@ -18,9 +18,11 @@ You can also get the node from comfy manager under the name of More math.
 ## Features
 
 - functions and variables in math expressions
-- conversions between int and float
+- Conversion between INT and FLOAT; AUDIO and IMAGE (red - real - strenght of cosine of frequency; blue - imaginary - strenght of sine of frequency; green - log1p of amplitude - just so it looks good to humans)
 - Nodes for FLOAT, CONDITIONING, LATENT, IMAGE, NOISE, AUDIO, VIDEO, MODEL, CLIP and VAE
 
+- Vector Math: Support for List literals `[v1, v2, ...]` and operations between lists/scalars
+
 ## Operators
 
 - Math: `+`, `-`, `*`, `/`, `%`, `^`, `||`
diff --git a/more_math/AudioToSpectrogramNode.py b/more_math/AudioToSpectrogramNode.py
@@ -0,0 +1,55 @@
+from comfy_api.latest import io
+import torch
+
+windows = {'bartlet':torch.bartlett_window, 'blackman':torch.blackman_window, 'hamming':torch.hamming_window,'hann':torch.hann_window}
+
+class AudioToSpectrogram(io.ComfyNode):
+    """
+    Converts Audio to an Image spectrogram.
+    Red = Real, Green = logarithm of value (just so it looks good), Blue = Imaginary.
+    Each audio channel is stacked vertically.
+    """
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="mrmth_AudioToImageSpectrogram",
+            category="More math",
+            display_name="Audio to Image Spectrogram",
+            inputs=[
+                io.Audio.Input(id="audio", tooltip="Input audio"),
+                io.Int.Input(id="window_length", default=1024, min=16, max=4096, tooltip="Window length in samples"),
+                io.Int.Input(id="hop_length", default=256, min=1, max=4096, tooltip="Stride of the window (hop length) in samples"),
+                io.Int.Input(id="bucket_count", default=513, min=2, max=4096, tooltip="Number of frequency buckets (determines resolution)"),
+                io.Combo.Input(id="window_type", default="hann", options=list(windows.keys()), tooltip="Type of window function to apply"),
+            ],
+            outputs=[
+                io.Image.Output(id="image"),
+                io.Int.Output(id="channel_count", display_name="Channel count", tooltip="Number of channels in the output image"),
+                io.Int.Output(id="sample_rate", display_name="Sample rate", tooltip="Sample rate of the input audio"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, audio, window_length, hop_length, bucket_count,window_type):
+        waveform = audio['waveform']
+        sample_rate = audio['sample_rate']
+        B, C, S = waveform.shape
+        n_fft = (bucket_count - 1) * 2
+        flat_waveform = waveform.reshape(B * C, S)
+        window = windows[window_type](window_length, device=waveform.device)
+
+        stft_out = torch.stft(
+            flat_waveform, n_fft=n_fft, hop_length=hop_length, win_length=window_length,
+            window=window, center=True, pad_mode='reflect', normalized=False,
+            onesided=True, return_complex=True
+        )
+
+        F, T = stft_out.shape[1], stft_out.shape[2]
+        stft_out = stft_out.reshape(B, C, F, T).reshape(B, C * F, T)
+        image = torch.stack([
+            stft_out.real,
+            torch.log1p(torch.abs(stft_out)),
+            stft_out.imag
+        ], dim=-1)
+        print(image.type())
+        return (image, C, sample_rate)
diff --git a/more_math/SpectrogramToAudioNode.py b/more_math/SpectrogramToAudioNode.py
@@ -0,0 +1,47 @@
+import torch
+from comfy_api.latest import io
+
+windows = {'bartlet':torch.bartlett_window, 'blackman':torch.blackman_window, 'hamming':torch.hamming_window,'hann':torch.hann_window}
+
+class SpectrogramToAudio(io.ComfyNode):
+    """
+    Converts an Image spectrogram back to Audio.
+    Red is real part and blue is imaginary. Green is ignored.
+    """
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="mrmth_ImageSpectrogramToAudio",
+            category="More math",
+            display_name="Image Spectrogram to Audio",
+            inputs=[
+                io.Image.Input(id="image", tooltip="Input spectrogram image (R=Real, G=Magnitude, B=Imaginary)"),
+                io.Int.Input(id="channel_count", default=1, min=1, tooltip="Number of audio channels"),
+                io.Int.Input(id="sample_rate", default=44100, min=1, tooltip="Sample rate of the output audio"),
+                io.Int.Input(id="window_length", default=1024, min=16, tooltip="Window length in samples"),
+                io.Int.Input(id="hop_length", default=256, min=1, tooltip="Stride of the window (hop length) in samples"),
+                io.Combo.Input(id="window_type", default="hann", options=list(windows.keys()), tooltip="Type of window function to apply"),
+
+            ],
+            outputs=[
+                io.Audio.Output(id="audio", tooltip="Output audio"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, image, channel_count, sample_rate, window_length, hop_length,window_type):
+        B, H, W, _ = image.shape
+        bucket_count = H // channel_count
+        n_fft = (bucket_count - 1) * 2
+        real = image[..., 0].reshape(B * channel_count, bucket_count, W)
+        imag = image[..., 2].reshape(B * channel_count, bucket_count, W)
+        stft_complex = torch.complex(real, imag)
+        window = windows[window_type](window_length,device = image.device)
+        waveform = torch.istft(
+            stft_complex, n_fft=n_fft, hop_length=hop_length, win_length=window_length,
+            window=window, center=True, normalized=False, onesided=True
+        )
+
+        waveform = waveform.reshape(B, channel_count, -1)
+        return ({"waveform": waveform, "sample_rate": sample_rate},)
+
diff --git a/more_math/nodes.py b/more_math/nodes.py
@@ -6,10 +6,12 @@
 from .ImageMathNode import ImageMathNode
 from .AudioMathNode import AudioMathNode
 from .VideoMathNode import VideoMathNode
-
 from .ModelMathNode import ModelMathNode
 from .VaeMathNode import VAEMathNode
 from .ClipMathNode import CLIPMathNode
+from .SpectrogramToAudioNode import SpectrogramToAudio
+from .AudioToSpectrogramNode import AudioToSpectrogram
+
 from comfy_api.latest import ComfyExtension, io
 
 class IntToFloatNode(io.ComfyNode):
@@ -76,7 +78,9 @@ async def get_node_list(self) -> list[type[io.ComfyNode]]:
                 IntToFloatNode,
                 FloatToIntNode,
                 AudioMathNode,
-                VideoMathNode
+                VideoMathNode,
+                AudioToSpectrogram,
+                SpectrogramToAudio
             ]
 async def comfy_entrypoint() -> MoreMathExtension:
     return MoreMathExtension()
diff --git a/pytest.ini b/pytest.ini
@@ -1,2 +1,2 @@
 [pytest]
-pythonpath = d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math/src d:/stability/Data/Packages/ComfyUI
+pythonpath = . d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math d:/stability/Data/Packages/ComfyUI

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`[pytest]`
`2`		`-pythonpath = d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math/src d:/stability/Data/Packages/ComfyUI`
	`2`	`+pythonpath = . d:/stability/Data/Packages/ComfyUI/custom_nodes/more_math d:/stability/Data/Packages/ComfyUI`