-
Notifications
You must be signed in to change notification settings - Fork 841
Description
On ESP32-LyraT-Mini v1.2 I can’t get audio from the on-board mic using the ADF VAD pipeline.
With the default board config: I2C init fails (NACK) when bringing up the codec/ADC.
With the custom board config (included below): I2C init succeeds, I2S config logs look OK, but the mic stream is silence — RMS ~0 and VAD never triggers.
I tried multiple board configs, I2S ports, slot modes, sample rates, MCLK sources, and raw I2S reads (without ADF). Behavior stays the same: either NACK on I2C with defaults, or no audio with the custom config.
Hardware
Board: ESP32-LyraT-Mini v1.2
On-board devices detected via I2C scan:
0x10 (ES7243 ADC?)
0x18 (ES8311/other codec?)
Flash: 8 MB
Chip rev (boot log): v3.1
Software / Tooling
ESP-ADF: v2.7 (clean checkout)
Submodules (from git submodule status):
esp-idf: v5.3.1
esp-sr: v1.7.1-34-g394aae6
esp-adf-libs: f1b8cdb
Build system: Ninja / CMake (also reproducible with idf.py build)
Host OS: Ubuntu 22.04 (Linux)
Example used: examples/speech_recognition/vad (modified main as below)
Minimal code to reproduce
This is the app_main() I run (RMS print + VAD). It builds/runs fine; the pipeline starts; just no actual mic signal comes through:
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "freertos/FreeRTOS.h"
#include "esp_log.h"
#include "board.h"
#include "audio_common.h"
#include "audio_pipeline.h"
#include "i2s_stream.h"
#include "raw_stream.h"
#include "filter_resample.h"
#include "esp_vad.h"
#include "audio_idf_version.h"
static const char *TAG = "EXAMPLE-VAD";
#define VAD_SAMPLE_RATE_HZ 16000
#define VAD_FRAME_LENGTH_MS 30
#define VAD_BUFFER_LENGTH (VAD_FRAME_LENGTH_MS * VAD_SAMPLE_RATE_HZ / 1000)
void app_main()
{
esp_log_level_set("*", ESP_LOG_WARN);
esp_log_level_set(TAG, ESP_LOG_INFO);
audio_pipeline_handle_t pipeline;
audio_element_handle_t i2s_stream_reader, filter, raw_read;
ESP_LOGI(TAG, "[ 1 ] Start codec chip");
audio_board_handle_t board_handle = audio_board_init();
audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_BOTH, AUDIO_HAL_CTRL_START);
ESP_LOGI(TAG, "[ 2 ] Create audio pipeline for recording");
audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
pipeline = audio_pipeline_init(&pipeline_cfg);
mem_assert(pipeline);
ESP_LOGI(TAG, "[2.1] Create i2s stream to read audio data from codec chip");
i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(1, 16000, 16, AUDIO_STREAM_READER);
i2s_stream_reader = i2s_stream_init(&i2s_cfg);
ESP_LOGI(TAG, "[2.2] Create filter to resample audio data");
rsp_filter_cfg_t rsp_cfg = DEFAULT_RESAMPLE_FILTER_CONFIG();
rsp_cfg.src_rate = 16000;
rsp_cfg.src_ch = 1;
rsp_cfg.dest_rate = VAD_SAMPLE_RATE_HZ;
rsp_cfg.dest_ch = 1;
filter = rsp_filter_init(&rsp_cfg);
ESP_LOGI(TAG, "[2.3] Create raw to receive data");
raw_stream_cfg_t raw_cfg = {
.out_rb_size = 8 * 1024,
.type = AUDIO_STREAM_READER,
};
raw_read = raw_stream_init(&raw_cfg);
ESP_LOGI(TAG, "[ 3 ] Register elements");
audio_pipeline_register(pipeline, i2s_stream_reader, "i2s");
audio_pipeline_register(pipeline, filter, "filter");
audio_pipeline_register(pipeline, raw_read, "raw");
ESP_LOGI(TAG, "[ 4 ] Link elements [codec]-->i2s-->filter-->raw");
const char *link_tag[3] = {"i2s", "filter", "raw"};
audio_pipeline_link(pipeline, &link_tag[0], 3);
ESP_LOGI(TAG, "[ 5 ] Start pipeline");
audio_pipeline_run(pipeline);
ESP_LOGI(TAG, "[ 6 ] Init VAD");
vad_handle_t vad_inst = vad_create(VAD_MODE_4);
int16_t *vad_buff = (int16_t *)malloc(VAD_BUFFER_LENGTH * sizeof(short));
if (!vad_buff) {
ESP_LOGE(TAG, "Memory allocation failed!");
goto abort_speech_detection;
}
while (1) {
int b_read = raw_stream_read(raw_read, (char *)vad_buff, VAD_BUFFER_LENGTH * sizeof(short));
if (b_read > 0) {
float rms = 0;
for (int i = 0; i < b_read / 2; i++) {
rms += vad_buff[i] * vad_buff[i];
}
rms = sqrtf(rms / (b_read / 2));
ESP_LOGI(TAG, "Audio level: %.2f", rms);
}
vad_state_t vad_state = vad_process(vad_inst, vad_buff, VAD_SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS);
if (vad_state == VAD_SPEECH) {
ESP_LOGI(TAG, "Speech detected");
}
}
free(vad_buff);
vad_buff = NULL;
abort_speech_detection:
ESP_LOGI(TAG, "[ 7 ] Destroy VAD");
vad_destroy(vad_inst);
ESP_LOGI(TAG, "[ 8 ] Stop pipeline and release");
audio_pipeline_stop(pipeline);
audio_pipeline_wait_for_stop(pipeline);
audio_pipeline_terminate(pipeline);
audio_pipeline_remove_listener(pipeline);
audio_pipeline_unregister(pipeline, i2s_stream_reader);
audio_pipeline_unregister(pipeline, filter);
audio_pipeline_unregister(pipeline, raw_read);
audio_pipeline_deinit(pipeline);
audio_element_deinit(i2s_stream_reader);
audio_element_deinit(filter);
audio_element_deinit(raw_read);
}
Final board config I am using (works on I2C, but still silent)
#ifndef AUDIO_BOARD_DEFINITION_H
#define AUDIO_BOARD_DEFINITION_H
/** SDCARD */
#define FUNC_SDCARD_EN (1)
#define SDCARD_OPEN_FILE_NUM_MAX 5
#define SDCARD_INTR_GPIO GPIO_NUM_34
#define SDCARD_PWR_CTRL GPIO_NUM_13
#define ESP_SD_PIN_CLK GPIO_NUM_14
#define ESP_SD_PIN_CMD GPIO_NUM_15
#define ESP_SD_PIN_D0 GPIO_NUM_2
#define ESP_SD_PIN_D3 -1
/** LEDs */
#define FUNC_SYS_LEN_EN (1)
#define BLUE_LED_GPIO GPIO_NUM_27
#define GREEN_LED_GPIO GPIO_NUM_22
/** Codec / ADC /
#define FUNC_AUDIO_CODEC_EN (1)
#define ES7243_MCLK_GPIO GPIO_NUM_0
#define HEADPHONE_DETECT GPIO_NUM_19
#define PA_ENABLE_GPIO GPIO_NUM_21
#define ES8311_MCLK_SOURCE 1 / 0: from MCLK, 1: from BCLK */
#define CODEC_ADC_I2S_PORT (1)
#define CODEC_ADC_BITS_PER_SAMPLE (16)
#define CODEC_ADC_SAMPLE_RATE (16000)
#define RECORD_HARDWARE_AEC (true)
#define BOARD_PA_GAIN (20)
extern audio_hal_func_t AUDIO_CODEC_ES8311_DEFAULT_HANDLE;
extern audio_hal_func_t AUDIO_CODEC_ES7243_DEFAULT_HANDLE
#define AUDIO_CODEC_DEFAULT_CONFIG(){
.adc_input = AUDIO_HAL_ADC_INPUT_LINE2,
.dac_output = AUDIO_HAL_DAC_OUTPUT_LINE2,
.codec_mode = AUDIO_HAL_CODEC_MODE_BOTH,
.i2s_iface = {
.mode = AUDIO_HAL_MODE_SLAVE,
.fmt = AUDIO_HAL_I2S_NORMAL,
.samples = AUDIO_HAL_16K_SAMPLES,
.bits = AUDIO_HAL_BIT_LENGTH_16BITS,
},
};
/** Buttons (unused here, left for completeness) /
#define FUNC_BUTTON_EN (1)
#define ADC_DETECT_GPIO GPIO_NUM_39
#define INPUT_KEY_NUM 6
/ ... (kept as in my project; not relevant to the audio issue) ... */
#endif
With this config, I2C init passes and I2S starts, but the mic stream is silent.
What happens (logs)
Boot & I2S bring-up (representative):
I (13) boot: ESP-IDF v5.3.1-dirty 2nd stage bootloader
I (40) boot: Partition Table: ...
I (2377) main_task: Calling app_main()
I (2397) EXAMPLE-VAD: [ 2 ] Create audio pipeline for recording
I (2397) EXAMPLE-VAD: [2.1] Create i2s stream to read audio data from codec chip
D (2397) i2s_common: rx channel is registered on I2S0 successfully
D (2407) i2s_common: DMA malloc info: dma_desc_num = 3, dma_desc_buf_size = 1248
D (2417) i2s_std: Clock division info: [sclk] 160000000 Hz [mdiv] 39 [mclk] 4096000 Hz [bdiv] 8 [bclk] 512000 Hz
D (2427) i2s_std: The rx channel on I2S0 has been initialized to STD mode successfully
I (....) EXAMPLE-VAD: [ 5 ] Start audio_pipeline
I (....) EXAMPLE-VAD: [ 6 ] Initialize VAD
I (....) EXAMPLE-VAD: Audio level: 0.00
I (....) EXAMPLE-VAD: Audio level: 0.00
... (stays near zero; no "Speech detected")
When I switch back to the default LyraT-Mini board config from ADF, init often fails earlier with I2C NACK while configuring the codec/ADC (typical message: write fail / no ack).
Independent I2C scan (separate test app) finds:
I2C device at 0x10
I2C device at 0x18
Expected behavior
The on-board mic should produce non-zero audio samples in the pipeline (RMS clearly above noise when speaking).
VAD should occasionally detect speech in front of the mic.
Actual behavior
With default board config: I2C NACK during init of audio chip(s).
With the custom config: init passes, I2S runs, but audio buffer is silence (RMS ~ 0; no VAD triggers).
What I already tried
Different I2S ports: I2S0 vs I2S1.
Slot modes: mono/stereo, 16/32-bit slots.
Sample rates: 8 kHz / 16 kHz / 44.1 kHz.
Bits per sample: 16 and 32.
ADC inputs: AUDIO_HAL_ADC_INPUT_LINE1 vs LINE2.
MCLK source toggles: from external MCLK vs from BCLK; ES7243_MCLK_GPIO set to GPIO0.
Toggled AUDIO_HAL_MODE_MASTER/SLAVE (staying consistent across I2S and codec).
Disabled resampler; tried raw I2S read path (no VAD) — still silence.
Verified device presence with an I2C scanner (0x10 and 0x18 respond).
Tried the IDF i2s_std example outside of ADF — still effectively zero-level input.
Checked PA enable, HP detect pins (not directly relevant to mic, but tested).
Rebuilt from clean, different toolchains (CLion/Ninja and idf.py), same result.