Skip to content

Commit 0c34eea

Browse files
committed
Merge remote-tracking branch 'origin/feature/musicQuan' into feat/musicIoT-Bluetooth
2 parents 26c7ebd + 0ce889d commit 0c34eea

21 files changed

+2576
-337
lines changed

main/application.cc

Lines changed: 556 additions & 250 deletions
Large diffs are not rendered by default.

main/application.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ class Application {
6363
AecMode GetAecMode() const { return aec_mode_; }
6464
void PlaySound(const std::string_view& sound);
6565
AudioService& GetAudioService() { return audio_service_; }
66+
void StartMusicStreaming(const std::string& url);
67+
void StopMusicStreaming();
68+
// New: Receive external audio data (such as music playback)
69+
void AddAudioData(AudioStreamPacket&& packet);
6670

6771
private:
6872
Application();

main/audio/audio_codec.cc

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ void AudioCodec::Start() {
3434
output_volume_ = 10;
3535
}
3636

37+
// 保存原始输出采样率
38+
if (original_output_sample_rate_ == 0) {
39+
original_output_sample_rate_ = output_sample_rate_;
40+
ESP_LOGI(TAG, "Saved original output sample rate: %d Hz", original_output_sample_rate_);
41+
}
42+
3743
if (tx_handle_ != nullptr) {
3844
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
3945
}
@@ -75,3 +81,74 @@ void AudioCodec::EnableOutput(bool enable) {
7581
output_enabled_ = enable;
7682
ESP_LOGI(TAG, "Set output enable to %s", enable ? "true" : "false");
7783
}
84+
85+
bool AudioCodec::SetOutputSampleRate(int sample_rate) {
86+
// 特殊处理:如果传入 -1,表示重置到原始采样率
87+
if (sample_rate == -1) {
88+
if (original_output_sample_rate_ > 0) {
89+
sample_rate = original_output_sample_rate_;
90+
ESP_LOGI(TAG, "Resetting to original output sample rate: %d Hz", sample_rate);
91+
} else {
92+
ESP_LOGW(TAG, "Original sample rate not available, cannot reset");
93+
return false;
94+
}
95+
}
96+
97+
if (sample_rate <= 0 || sample_rate > 192000) {
98+
ESP_LOGE(TAG, "Invalid sample rate: %d", sample_rate);
99+
return false;
100+
}
101+
102+
if (output_sample_rate_ == sample_rate) {
103+
ESP_LOGI(TAG, "Sample rate already set to %d Hz", sample_rate);
104+
return true;
105+
}
106+
107+
if (tx_handle_ == nullptr) {
108+
ESP_LOGW(TAG, "TX handle is null, only updating sample rate variable");
109+
output_sample_rate_ = sample_rate;
110+
return true;
111+
}
112+
113+
ESP_LOGI(TAG, "Changing output sample rate from %d to %d Hz", output_sample_rate_, sample_rate);
114+
115+
// 先尝试禁用 I2S 通道(如果已启用的话)
116+
esp_err_t disable_ret = i2s_channel_disable(tx_handle_);
117+
if (disable_ret == ESP_OK) {
118+
ESP_LOGI(TAG, "Disabled I2S TX channel for reconfiguration");
119+
} else if (disable_ret == ESP_ERR_INVALID_STATE) {
120+
// 通道可能已经是禁用状态,这是正常的
121+
ESP_LOGI(TAG, "I2S TX channel was already disabled");
122+
} else {
123+
ESP_LOGW(TAG, "Failed to disable I2S TX channel: %s", esp_err_to_name(disable_ret));
124+
}
125+
126+
// 重新配置 I2S 时钟
127+
i2s_std_clk_config_t clk_cfg = {
128+
.sample_rate_hz = (uint32_t)sample_rate,
129+
.clk_src = I2S_CLK_SRC_DEFAULT,
130+
.mclk_multiple = I2S_MCLK_MULTIPLE_256,
131+
#ifdef I2S_HW_VERSION_2
132+
.ext_clk_freq_hz = 0,
133+
#endif
134+
};
135+
136+
esp_err_t ret = i2s_channel_reconfig_std_clock(tx_handle_, &clk_cfg);
137+
138+
// 重新启用通道(无论之前是什么状态,现在都需要启用以便播放音频)
139+
esp_err_t enable_ret = i2s_channel_enable(tx_handle_);
140+
if (enable_ret != ESP_OK) {
141+
ESP_LOGE(TAG, "Failed to enable I2S TX channel: %s", esp_err_to_name(enable_ret));
142+
} else {
143+
ESP_LOGI(TAG, "Enabled I2S TX channel");
144+
}
145+
146+
if (ret == ESP_OK) {
147+
output_sample_rate_ = sample_rate;
148+
ESP_LOGI(TAG, "Successfully changed output sample rate to %d Hz", sample_rate);
149+
return true;
150+
} else {
151+
ESP_LOGE(TAG, "Failed to change sample rate to %d Hz: %s", sample_rate, esp_err_to_name(ret));
152+
return false;
153+
}
154+
}

main/audio/audio_codec.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class AudioCodec {
2323
virtual void SetInputGain(float gain);
2424
virtual void EnableInput(bool enable);
2525
virtual void EnableOutput(bool enable);
26+
virtual bool SetOutputSampleRate(int sample_rate);
2627

2728
virtual void OutputData(std::vector<int16_t>& data);
2829
virtual bool InputData(std::vector<int16_t>& data);
@@ -32,6 +33,7 @@ class AudioCodec {
3233
inline bool input_reference() const { return input_reference_; }
3334
inline int input_sample_rate() const { return input_sample_rate_; }
3435
inline int output_sample_rate() const { return output_sample_rate_; }
36+
inline int original_output_sample_rate() const { return original_output_sample_rate_; }
3537
inline int input_channels() const { return input_channels_; }
3638
inline int output_channels() const { return output_channels_; }
3739
inline int output_volume() const { return output_volume_; }
@@ -49,6 +51,7 @@ class AudioCodec {
4951
bool output_enabled_ = false;
5052
int input_sample_rate_ = 0;
5153
int output_sample_rate_ = 0;
54+
int original_output_sample_rate_ = 0;
5255
int input_channels_ = 1;
5356
int output_channels_ = 1;
5457
int output_volume_ = 70;

main/audio/audio_service.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,15 @@ void AudioService::ResetDecoder() {
634634
audio_queue_cv_.notify_all();
635635
}
636636

637+
void AudioService::UpdateOutputTimestamp() {
638+
last_output_time_ = std::chrono::steady_clock::now();
639+
// Debug: Log timestamp updates during music playback (reduce frequency)
640+
static int update_count = 0;
641+
if (update_count++ % 50 == 0) { // Log every 50 updates
642+
ESP_LOGD(TAG, "Updated output timestamp (update #%d)", update_count);
643+
}
644+
}
645+
637646
void AudioService::CheckAndUpdateAudioPowerState() {
638647
auto now = std::chrono::steady_clock::now();
639648
auto input_elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - last_input_time_).count();

main/audio/audio_service.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ class AudioService {
108108
void PlaySound(const std::string_view& sound);
109109
bool ReadAudioData(std::vector<int16_t>& data, int sample_rate, int samples);
110110
void ResetDecoder();
111+
void UpdateOutputTimestamp();
111112
void SetModelsList(srmodel_list_t* models_list);
112113

113114
private:
@@ -158,4 +159,4 @@ class AudioService {
158159
void CheckAndUpdateAudioPowerState();
159160
};
160161

161-
#endif
162+
#endif

main/audio/processors/afe_audio_processor.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,34 @@
1313
#include "audio_processor.h"
1414
#include "audio_codec.h"
1515

16-
class AfeAudioProcessor : public AudioProcessor {
16+
class AfeAudioProcessor : public AudioProcessor
17+
{
1718
public:
1819
AfeAudioProcessor();
1920
~AfeAudioProcessor();
2021

21-
void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) override;
22-
void Feed(std::vector<int16_t>&& data) override;
22+
void Initialize(AudioCodec *codec, int frame_duration_ms, srmodel_list_t *models_list) override;
23+
void Feed(std::vector<int16_t> &&data) override;
2324
void Start() override;
2425
void Stop() override;
2526
bool IsRunning() override;
26-
void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) override;
27+
void OnOutput(std::function<void(std::vector<int16_t> &&data)> callback) override;
2728
void OnVadStateChange(std::function<void(bool speaking)> callback) override;
2829
size_t GetFeedSize() override;
2930
void EnableDeviceAec(bool enable) override;
3031

3132
private:
3233
EventGroupHandle_t event_group_ = nullptr;
33-
esp_afe_sr_iface_t* afe_iface_ = nullptr;
34-
esp_afe_sr_data_t* afe_data_ = nullptr;
35-
std::function<void(std::vector<int16_t>&& data)> output_callback_;
34+
const esp_afe_sr_iface_t *afe_iface_ = nullptr;
35+
esp_afe_sr_data_t *afe_data_ = nullptr;
36+
std::function<void(std::vector<int16_t> &&data)> output_callback_;
3637
std::function<void(bool speaking)> vad_state_change_callback_;
37-
AudioCodec* codec_ = nullptr;
38+
AudioCodec *codec_ = nullptr;
3839
int frame_samples_ = 0;
3940
bool is_speaking_ = false;
4041
std::vector<int16_t> output_buffer_;
4142

4243
void AudioProcessorTask();
4344
};
4445

45-
#endif
46+
#endif

main/audio/wake_words/afe_wake_word.cc

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#include "audio_service.h"
33

44
#include <esp_log.h>
5+
#include <esp_heap_caps.h>
6+
#include <cstring>
57
#include <sstream>
68

79
#define DETECTION_RUNNING_EVENT 1
@@ -106,10 +108,30 @@ void AfeWakeWord::Stop() {
106108
}
107109

108110
void AfeWakeWord::Feed(const std::vector<int16_t>& data) {
109-
if (afe_data_ == nullptr) {
111+
if (afe_data_ == nullptr || data.empty()) {
112+
return;
113+
}
114+
115+
// Always ensure 4-byte alignment for AFE ring buffer
116+
// std::vector doesn't guarantee alignment, so we always copy to aligned buffer
117+
size_t data_size_bytes = data.size() * sizeof(int16_t);
118+
119+
// Allocate aligned buffer (4-byte alignment, round up to multiple of 4)
120+
size_t aligned_size = (data_size_bytes + 3) & ~3; // Round up to multiple of 4
121+
int16_t* aligned_data = static_cast<int16_t*>(heap_caps_aligned_alloc(4, aligned_size, MALLOC_CAP_INTERNAL));
122+
if (aligned_data == nullptr) {
123+
ESP_LOGE(TAG, "Failed to allocate aligned buffer for AFE feed (size: %zu)", aligned_size);
110124
return;
111125
}
112-
afe_iface_->feed(afe_data_, data.data());
126+
127+
// Copy data to aligned buffer
128+
memcpy(aligned_data, data.data(), data_size_bytes);
129+
130+
// Feed aligned data to AFE
131+
afe_iface_->feed(afe_data_, aligned_data);
132+
133+
// Free aligned buffer
134+
heap_caps_free(aligned_data);
113135
}
114136

115137
size_t AfeWakeWord::GetFeedSize() {

main/audio/wake_words/afe_wake_word.h

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,41 +19,42 @@
1919
#include "audio_codec.h"
2020
#include "wake_word.h"
2121

22-
class AfeWakeWord : public WakeWord {
22+
class AfeWakeWord : public WakeWord
23+
{
2324
public:
2425
AfeWakeWord();
2526
~AfeWakeWord();
2627

27-
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
28-
void Feed(const std::vector<int16_t>& data);
29-
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
28+
bool Initialize(AudioCodec *codec, srmodel_list_t *models_list);
29+
void Feed(const std::vector<int16_t> &data);
30+
void OnWakeWordDetected(std::function<void(const std::string &wake_word)> callback);
3031
void Start();
3132
void Stop();
3233
size_t GetFeedSize();
3334
void EncodeWakeWordData();
34-
bool GetWakeWordOpus(std::vector<uint8_t>& opus);
35-
const std::string& GetLastDetectedWakeWord() const { return last_detected_wake_word_; }
35+
bool GetWakeWordOpus(std::vector<uint8_t> &opus);
36+
const std::string &GetLastDetectedWakeWord() const { return last_detected_wake_word_; }
3637

3738
private:
3839
srmodel_list_t *models_ = nullptr;
39-
esp_afe_sr_iface_t* afe_iface_ = nullptr;
40-
esp_afe_sr_data_t* afe_data_ = nullptr;
41-
char* wakenet_model_ = NULL;
40+
const esp_afe_sr_iface_t *afe_iface_ = nullptr;
41+
esp_afe_sr_data_t *afe_data_ = nullptr;
42+
char *wakenet_model_ = NULL;
4243
std::vector<std::string> wake_words_;
4344
EventGroupHandle_t event_group_;
44-
std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
45-
AudioCodec* codec_ = nullptr;
45+
std::function<void(const std::string &wake_word)> wake_word_detected_callback_;
46+
AudioCodec *codec_ = nullptr;
4647
std::string last_detected_wake_word_;
4748

4849
TaskHandle_t wake_word_encode_task_ = nullptr;
49-
StaticTask_t* wake_word_encode_task_buffer_ = nullptr;
50-
StackType_t* wake_word_encode_task_stack_ = nullptr;
50+
StaticTask_t *wake_word_encode_task_buffer_ = nullptr;
51+
StackType_t *wake_word_encode_task_stack_ = nullptr;
5152
std::deque<std::vector<int16_t>> wake_word_pcm_;
5253
std::deque<std::vector<uint8_t>> wake_word_opus_;
5354
std::mutex wake_word_mutex_;
5455
std::condition_variable wake_word_cv_;
5556

56-
void StoreWakeWordData(const int16_t* data, size_t size);
57+
void StoreWakeWordData(const int16_t *data, size_t size);
5758
void AudioDetectionTask();
5859
};
5960

main/boards/common/board.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "display/display.h"
55
#include "display/oled_display.h"
66
#include "assets/lang_config.h"
7+
#include "boards/common/esp32_music.h"
78

89
#include <esp_log.h>
910
#include <esp_ota_ops.h>
@@ -20,6 +21,12 @@ Board::Board() {
2021
settings.SetString("uuid", uuid_);
2122
}
2223
ESP_LOGI(TAG, "UUID=%s SKU=%s", uuid_.c_str(), BOARD_NAME);
24+
InitializeMusic();
25+
}
26+
27+
void Board::InitializeMusic() {
28+
ESP_LOGI(TAG, "Initialize Music");
29+
music_ = new Esp32Music();
2330
}
2431

2532
std::string Board::GenerateUuid() {
@@ -62,6 +69,10 @@ Camera* Board::GetCamera() {
6269
return nullptr;
6370
}
6471

72+
Music* Board::GetMusic() {
73+
return music_;
74+
}
75+
6576
Led* Board::GetLed() {
6677
static NoLed led;
6778
return &led;

0 commit comments

Comments
 (0)