Skip to content

Commit 5ab3e4c

Browse files
committed
v0.12
Add Autocorrelation Tempogram
1 parent ad4dc55 commit 5ab3e4c

File tree

6 files changed

+198
-7
lines changed

6 files changed

+198
-7
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
cmake_minimum_required(VERSION 3.22)
44

5-
project(PerceptoMap VERSION 0.11)
5+
project(PerceptoMap VERSION 0.12)
66

77

88
### Dependency versions ###

README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,14 @@ Unlike typical spectrum or spectrogram analyzers, it supports perceptual visuali
216216
</sub>
217217
</td>
218218
<td align="center" valign="top">
219-
<img src="_pics/blank_800x630.png" width="100%" alt="blank" />
219+
<img src="_pics/gui_autocorrelation_tempogram.png" width="100%" alt="blank" />
220+
<br/>
221+
<sub>
222+
<strong>Autocorrelation Tempogram:</strong>
223+
<i>
224+
Rhythm/tempo map from windowed autocorrelation of the onset/novelty signal, robust to phase; includes a dynamic tempo track <span style="color: gray;">[added in v0.12]</span>
225+
</i>
226+
</sub>
220227
</td>
221228
<td align="center" valign="top">
222229
<img src="_pics/blank_800x630.png" width="100%" alt="blank" />
@@ -240,7 +247,7 @@ Unlike typical spectrum or spectrogram analyzers, it supports perceptual visuali
240247
| Enhanced Mel Spectrogram with Time–Frequency Reassignment (Mel+) | ✅ Done (v0.7) | Mel-scaled spectrogram with sharper harmonic ridges and crisper transients by reassigning each STFT bins energy to its true instantaneous frequency, then projecting onto the Mel axis | Based on the same reassignment principle as Linear+. Mapped to Mel. |
241248
| Y-axis Range Control | ✅ Done (v0.8) | Precise control over visible frequency band | Dual-handle range slider + editable min/max fields |
242249
| Fourier Tempogram | ✅ Done (v0.10) | Rhythm/tempo map showing BPM energy over time, with dynamic tempo track (Tempo Line), sharper separation of nearby tempo | Based on [[10.1109/ICASSP.2010.5495219](https://doi.org/10.1109/ICASSP.2010.5495219)]. Positive spectral flux on log-compressed STFT; STFT of the novelty with a Hann window of length `wantWinSec` seconds (default: 8s); BPM axis sampled on a log scale. Per-frame prior-weighted peak picking (log-normal prior) overlays a continuous tempo line.<br/> **Note:** the tempogram and tempo line update with a delay = `wantWinSec` (window accumulation). On entering this mode the FFT size will be auto-bumped to >= 4096 for a more stable onset envelope. |
243-
| Autocorrelation Tempogram | ⏳ Planned | Time-lag periodicity map (tempo strength over time, mapped to BPM) with dynamic Tempo Line, naturally highlights double/half-time relationships | - |
250+
| Autocorrelation Tempogram | ✅ Done (v0.12) | Time-lag periodicity map (tempo strength over time, mapped to BPM) with dynamic Tempo Line, robust to local phase, naturally highlights double/half-time relationships | Similar to the Fourier Tempogram, but computed via windowed autocorrelation of the onset/novelty signal. The AutoCorrelation Function (ACF) is normalized by the zero-lag term; the BPM axis is log-spaced, and a per-frame log-normal prior guides peak picking to draw the Tempo Line. Note: entering this mode auto-sets FFT size to 2048 for improved temporal resolution. |
244251
| Spectral Flatness / Contrast | ⏳ Planned | Measures of timbral characteristics | - |
245252

246253
[Back to top ↥](#perceptomap)

Source/PluginEditor.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ SpectrogramAudioProcessorEditor::SpectrogramAudioProcessorEditor(SpectrogramAudi
154154
"- Spectral Centroid: STFT spectrogram with added curves showing where the energy is centered and how widely it is spread across frequencies.\n"
155155
"- Chroma: Chromagram showing the energy distribution across the 12 pitch classes (C to B), regardless of octave. Useful for analyzing harmonic content and key.\n"
156156
"- Fourier Tempogram: Tempo (BPM) energy vs. time computed from the onset envelope via STFT, overlays a dynamic tempo line (peak per frame with log-normal prior). Tip: Use a higher FFT size like 4096.\n"
157+
"- Autocorr Tempogram: Tempo (BPM) vs. time via autocorrelation of the onset envelope. More robust to local phase than Fourier Tempogram.\n"
157158
);
158159
spectrogramModeBox.addItem("Linear", static_cast<int>(SpectrogramComponent::SpectrogramMode::Linear));
159160
spectrogramModeBox.addItem("Linear+", static_cast<int>(SpectrogramComponent::SpectrogramMode::LinearPlus));
@@ -163,6 +164,8 @@ SpectrogramAudioProcessorEditor::SpectrogramAudioProcessorEditor(SpectrogramAudi
163164
spectrogramModeBox.addItem("Spectral Centroid", static_cast<int>(SpectrogramComponent::SpectrogramMode::LinearWithCentroid));
164165
spectrogramModeBox.addItem("Chroma", static_cast<int>(SpectrogramComponent::SpectrogramMode::Chroma));
165166
spectrogramModeBox.addItem("Fourier Tempogram", static_cast<int>(SpectrogramComponent::SpectrogramMode::FourierTempogram));
167+
spectrogramModeBox.addItem("Autocorr Tempogram", static_cast<int>(SpectrogramComponent::SpectrogramMode::AutoTempogram));
168+
166169

167170
spectrogramModeBox.setSelectedId(static_cast<int>(SpectrogramComponent::SpectrogramMode::Linear)); // default: linear
168171

@@ -461,6 +464,23 @@ void SpectrogramAudioProcessorEditor::MenuDisableControl(SpectrogramComponent::S
461464
fftSizeBox.setSelectedId(12, juce::sendNotificationSync);
462465
break;
463466
}
467+
// Autocorr Tempogram
468+
case SpectrogramComponent::SpectrogramMode::AutoTempogram:
469+
{
470+
logScaleBox.setEnabled(false);
471+
yRangeSlider.setEnabled(false);
472+
yMinHzEdit.setEnabled(false);
473+
yMaxHzEdit.setEnabled(false);
474+
floorDbSlider.setEnabled(true);
475+
noteAxisToggle.setEnabled(false);
476+
tempoAvgResetBtn.setVisible(true);
477+
// auto switch FFT size to =2048 for better tempogram results
478+
const int curOrder = fftSizeBox.getSelectedId();
479+
if (curOrder != 11) // 2^11=2048
480+
fftSizeBox.setSelectedId(11, juce::sendNotificationSync);
481+
482+
break;
483+
}
464484
// Linear STFT
465485
default:
466486
logScaleBox.setEnabled(true);

Source/SpectrogramComponent.cpp

Lines changed: 163 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,10 @@ void SpectrogramComponent::pushNextFFTBlock(const float* data, size_t numSamples
222222

223223
forwardFFT.performFrequencyOnlyForwardTransform(fftData.data());
224224

225-
// Fourier tempogram
225+
// Fourier tempogram / Autocorrelation tempogram
226226
// compute spectral flux novelty and push into novelty ring buffer
227-
if (currentMode == SpectrogramMode::FourierTempogram)
227+
if (currentMode == SpectrogramMode::FourierTempogram
228+
|| currentMode == SpectrogramMode::AutoTempogram)
228229
{
229230
// current magnitude spectrum in [0 ... fftSize/2)
230231
const int nBins = fftSize / 2;
@@ -1057,6 +1058,156 @@ void SpectrogramComponent::drawFourierTempogram(int x, std::vector<float>& dBCol
10571058
}
10581059
}
10591060

1061+
void SpectrogramComponent::drawAutoTempogram(int x, std::vector<float>& dBColumn, int imageHeight)
1062+
{
1063+
if (noveltyRing.empty() || noveltyWinLen <= 2 || imageHeight <= 0)
1064+
return;
1065+
1066+
// novelty (consistent with Fourier Tempogram)
1067+
std::vector<float> seg(noveltyWinLen, 0.0f);
1068+
for (int i = 0; i < noveltyWinLen; ++i)
1069+
{
1070+
int idx = (int)noveltyWrite - 1 - i;
1071+
if (idx < 0) idx += (int)noveltyRing.size();
1072+
seg[noveltyWinLen - 1 - i] = noveltyRing[(size_t)idx];
1073+
}
1074+
1075+
float mu = 0.0f;
1076+
for (float v : seg) mu += v;
1077+
mu /= (float)noveltyWinLen;
1078+
1079+
float activity = 0.0f;
1080+
for (int i = 0; i < noveltyWinLen; ++i)
1081+
{
1082+
seg[i] = std::max(0.0f, seg[i] - mu);
1083+
activity += seg[i];
1084+
}
1085+
if (activity < activityThresh)
1086+
{
1087+
for (int y = 0; y < imageHeight; ++y)
1088+
{
1089+
dBColumn[y] = floorDb;
1090+
spectrogramImage.setPixelAt(x, y, getColourForValue(0.0f));
1091+
}
1092+
lastFourierTempoY = -1;
1093+
return;
1094+
}
1095+
1096+
// Calculate windowed autocorrelation
1097+
// ACF on seg * window, then normalize / acf[0]
1098+
const double winSum = std::accumulate(noveltyWin.begin(), noveltyWin.end(), 0.0);
1099+
const double normW = (winSum > 1e-12) ? (1.0 / winSum) : 1.0;
1100+
1101+
std::vector<float> wseg(noveltyWinLen);
1102+
for (int i = 0; i < noveltyWinLen; ++i)
1103+
wseg[i] = seg[i] * noveltyWin[i] * (float)normW;
1104+
1105+
// Precompute 0-lag energy for normalization
1106+
double acf0 = 0.0;
1107+
for (int i = 0; i < noveltyWinLen; ++i) acf0 += (double)wseg[i] * (double)wseg[i];
1108+
if (acf0 < 1e-12) acf0 = 1e-12;
1109+
1110+
// Divide the "BPM axis (logarithmically uniform)" into M discrete points and
1111+
// calculate the corresponding lag for each BPM (rounding down).
1112+
const int M = juce::jmax(8, tempoBins);
1113+
std::vector<float> tempoBPM(M), mag(M, 0.0f);
1114+
1115+
const double logMin = std::log10(juce::jmax(1.0f, tempoMinBPM));
1116+
const double logMax = std::log10(juce::jmax(tempoMinBPM + 1.0f, tempoMaxBPM));
1117+
for (int m = 0; m < M; ++m)
1118+
{
1119+
double alpha = (double)m / (double)(M - 1);
1120+
double bpm = std::pow(10.0, logMin + alpha * (logMax - logMin));
1121+
tempoBPM[m] = (float)bpm;
1122+
1123+
double periodSec = 60.0 / juce::jmax(1e-9, bpm);
1124+
int lag = (int)std::round(periodSec / juce::jmax(1e-12, noveltySamplePeriod));
1125+
1126+
// Only retain reasonable lag
1127+
if (lag >= 1 && lag < noveltyWinLen)
1128+
{
1129+
double s = 0.0;
1130+
// sum_t wseg[t] * wseg[t-lag]
1131+
for (int t = lag; t < noveltyWinLen; ++t)
1132+
s += (double)wseg[t] * (double)wseg[t - lag];
1133+
1134+
float val = (float)(s / acf0);
1135+
mag[m] = juce::jlimit(0.0f, 1.0f, val);
1136+
}
1137+
}
1138+
1139+
// Mapped to the image column
1140+
for (int y = 0; y < imageHeight; ++y)
1141+
{
1142+
float frac = 1.0f - ((float)y / (float)(imageHeight - 1));
1143+
int m = juce::jlimit(0, M - 1, (int)std::round(frac * (M - 1)));
1144+
1145+
float v = mag[m] * normFactor;
1146+
float dB = 20.0f * std::log10(v + 1e-9f);
1147+
float dbC = juce::jlimit(floorDb, 0.0f, dB);
1148+
dBColumn[y] = dbC;
1149+
1150+
float bright = juce::jmap(dbC, floorDb, 0.0f, 0.0f, 1.0f);
1151+
spectrogramImage.setPixelAt(x, y, getColourForValue(bright));
1152+
}
1153+
1154+
// Overlay tempo curve (using the same logarithmic prior as Fourier to suppress frequency ambiguity)
1155+
juce::Colour tempoLineColour;
1156+
switch (colourScheme)
1157+
{
1158+
case ColourScheme::Grayscale:
1159+
case ColourScheme::GrayscaleEnhanced: tempoLineColour = juce::Colour::fromRGB(0, 200, 255); break;
1160+
case ColourScheme::Magma:
1161+
case ColourScheme::MagmaEnhanced: tempoLineColour = juce::Colour::fromRGB(0, 255, 128); break;
1162+
default: tempoLineColour = juce::Colour::fromRGB(255, 255, 255); break;
1163+
}
1164+
1165+
int mPeak = 0;
1166+
float bestVal = -1.0e30f;
1167+
float bestBPM = tempoBPM[0];
1168+
for (int m = 0; m < M; ++m)
1169+
{
1170+
const float bpm = tempoBPM[m];
1171+
const float score = mag[m] * fourierTempoPrior(bpm);
1172+
if (score > bestVal) { bestVal = score; mPeak = m; bestBPM = bpm; }
1173+
}
1174+
1175+
const int yTempo = (int)std::round(bpmToImageY(bestBPM, imageHeight));
1176+
if (yTempo >= 0 && yTempo < imageHeight)
1177+
{
1178+
juce::Graphics imgG(spectrogramImage);
1179+
imgG.setColour(tempoLineColour.withAlpha(0.95f));
1180+
1181+
if (lastFourierTempoY >= 0)
1182+
imgG.drawLine((float)(x - 1), (float)lastFourierTempoY, (float)x, (float)yTempo, 2.0f);
1183+
else
1184+
for (int dy = -1; dy <= 1; ++dy)
1185+
spectrogramImage.setPixelAt(x, juce::jlimit(0, imageHeight - 1, yTempo + dy), tempoLineColour);
1186+
lastFourierTempoY = yTempo;
1187+
}
1188+
1189+
// BPM tempo statistics
1190+
if (bestBPM > 0.0f && std::isfinite(bestBPM))
1191+
{
1192+
// avg
1193+
tempoAvgSum += bestBPM;
1194+
tempoAvgCount += 1;
1195+
globalTempoBPM = (float)(tempoAvgSum / std::max(1, tempoAvgCount));
1196+
1197+
// min/max
1198+
tempoMin = std::min(tempoMin, bestBPM);
1199+
tempoMax = std::max(tempoMax, bestBPM);
1200+
1201+
// median
1202+
tempoHistory.push_back(bestBPM);
1203+
std::vector<float> tmp = tempoHistory;
1204+
std::sort(tmp.begin(), tmp.end());
1205+
const size_t n = tmp.size();
1206+
tempoMedian = (n % 2) ? tmp[n / 2]
1207+
: 0.5f * (tmp[n / 2 - 1] + tmp[n / 2]);
1208+
}
1209+
}
1210+
10601211
void SpectrogramComponent::drawNextLineOfSpectrogram()
10611212
{
10621213
const int imageWidth = spectrogramImage.getWidth();
@@ -1104,6 +1255,10 @@ void SpectrogramComponent::drawNextLineOfSpectrogram()
11041255
case SpectrogramMode::FourierTempogram:
11051256
drawFourierTempogram(x, dBColumn, imageHeight);
11061257
break;
1258+
case SpectrogramMode::AutoTempogram:
1259+
drawAutoTempogram(x, dBColumn, imageHeight);
1260+
break;
1261+
11071262
default:
11081263
drawLinearSpectrogram(x, dBColumn, imageHeight, maxFreq);
11091264
break;
@@ -1564,6 +1719,9 @@ void SpectrogramComponent::paint(juce::Graphics& g)
15641719
case SpectrogramMode::FourierTempogram:
15651720
paintTempoYAxis(g, width, imageHeight);
15661721
break;
1722+
case SpectrogramMode::AutoTempogram:
1723+
paintTempoYAxis(g, width, imageHeight);
1724+
break;
15671725

15681726
default:
15691727
paintSTFTYAxis(g, width, imageHeight);
@@ -1662,6 +1820,9 @@ void SpectrogramComponent::paint(juce::Graphics& g)
16621820
case SpectrogramMode::FourierTempogram:
16631821
labelText = drawTempogramTooltip(dB, imgY, imageHeight);
16641822
break;
1823+
case SpectrogramMode::AutoTempogram:
1824+
labelText = drawTempogramTooltip(dB, imgY, imageHeight);
1825+
break;
16651826

16661827
default:
16671828
labelText = drawSTFTTooltip(dB, imgY, freq);

Source/SpectrogramComponent.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ class SpectrogramComponent : public juce::Component,
3737
Chroma,
3838
LinearPlus,
3939
MelPlus,
40-
FourierTempogram
41-
// TODO: add Autocorrelation Tempogram, Rhythm, etc.
40+
FourierTempogram,
41+
AutoTempogram
42+
// TODO: add Rhythm, etc.
4243
};
4344

4445
SpectrogramComponent();
@@ -171,6 +172,8 @@ class SpectrogramComponent : public juce::Component,
171172
void drawReassignedMelSpectrogram(int x, std::vector<float>& dBColumn, int imageHeight);
172173
// draw Fourier Tempogram
173174
void drawFourierTempogram(int x, std::vector<float>& dBColumn, int imageHeight);
175+
// draw Autocorrelation Tempogram
176+
void drawAutoTempogram(int x, std::vector<float>& dBColumn, int imageHeight);
174177

175178
SpectrogramMode currentMode = SpectrogramMode::Linear;
176179

82.2 KB
Loading

0 commit comments

Comments
 (0)