Skip to content

Commit f0a90ec

Browse files
adding AI examples
1 parent 17190a6 commit f0a90ec

File tree

26 files changed

+6369
-427
lines changed

26 files changed

+6369
-427
lines changed

User_Setup.h

Lines changed: 399 additions & 399 deletions
Large diffs are not rendered by default.

examples/LUXE/stt_lml_tts_luxe/MuseAI.cpp

Lines changed: 469 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#ifndef MuseAI_h
2+
#define MuseAI_h
3+
4+
#include <HTTPClient.h>
5+
#include <ArduinoJson.h>
6+
#include "settings.h"
7+
#include <ESP_I2S.h>
8+
#include <WiFi.h>
9+
#include <WiFiClientSecure.h>
10+
#include <HTTPClient.h>
11+
#include <ArduinoJson.h>
12+
#include <mbedtls/base64.h>
13+
#include <Audio.h>
14+
#include <FS.h>
15+
#include <SD.h>
16+
17+
class MuseAI {
18+
public:
19+
MuseAI(const char* apiKey);
20+
String sendMessage(String message);
21+
bool textToSpeech(String text);
22+
String speechToText(const char* audioFilePath);
23+
String speechToTextFromBuffer(uint8_t* audioBuffer, size_t bufferSize);
24+
String sendImageMessage(const char* imageFilePath, String question);
25+
26+
private:
27+
const char* _apiKey;
28+
String _buildPayload(String message);
29+
String _processResponse(String response);
30+
String _buildTTSPayload(String text);
31+
String _buildMultipartForm(const char* audioFilePath, String boundary);
32+
WiFiClientSecure _client;
33+
};
34+
35+
#if defined(ARDUINO_ARCH_ESP32) || defined(ESP32)
36+
// ---------------- Base64 helpers ----------------
37+
String muse_b64(const uint8_t *data, size_t n);
38+
size_t muse_un64(const char *s, uint8_t *dst, size_t cap);
39+
40+
// ---------------- Ring-buffer PCM ingest ----------------
41+
size_t muse_push_pcm_to_ring(
42+
const char *b64str,
43+
uint8_t *ring,
44+
size_t ringBytes,
45+
volatile size_t *head,
46+
volatile size_t *tail,
47+
portMUX_TYPE *mux,
48+
volatile bool *speakingFlag /* nullable */);
49+
50+
// ---------------- Speaker task ----------------
51+
struct MuseSpeakerTaskConfig {
52+
I2SClass *i2s;
53+
uint32_t rate;
54+
size_t chunkBytes;
55+
uint8_t *ring;
56+
size_t ringBytes;
57+
volatile size_t *head;
58+
volatile size_t *tail;
59+
portMUX_TYPE *mux;
60+
volatile uint32_t *pcmOut; // nullable
61+
uint16_t primeMs; // e.g., 500
62+
};
63+
64+
void muse_start_speaker_task(const MuseSpeakerTaskConfig &cfg,
65+
const char *taskName = "spk",
66+
uint32_t stack = 4096,
67+
UBaseType_t prio = 1,
68+
BaseType_t core = 1);
69+
#endif
70+
71+
#endif
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#ifndef MUSE_SETTINGS_H
2+
#define MUSE_SETTINGS_H
3+
4+
// Central configuration for Muse AI integration
5+
// Move sensitive values out of source control for production use.
6+
7+
namespace MuseAISettings {
8+
// OpenAI API key
9+
inline const char OPENAI_API_KEY[] = "sk-YOUR ENTER API KEY HERE";
10+
11+
// Model identifiers
12+
inline const char MODEL_LLM[] = "gpt-4o-mini";
13+
inline const char MODEL_TTS[] = "tts-1";
14+
inline const char MODEL_STT[] = "gpt-4o-mini-transcribe";
15+
16+
// OpenAI endpoints
17+
inline const char URL_CHAT[] = "https://api.openai.com/v1/chat/completions";
18+
inline const char URL_TTS[] = "https://api.openai.com/v1/audio/speech";
19+
inline const char URL_STT[] = "https://api.openai.com/v1/audio/transcriptions";
20+
}
21+
22+
#endif // MUSE_SETTINGS_H
23+
Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
#include "settings.h"
2+
#include "MuseAI.h"
3+
#include "museWrover.h"
4+
#include "Audio.h"
5+
#include "wav_header.h"
6+
#include <ESP_I2S.h>
7+
8+
9+
extern "C" {
10+
#include "driver/gpio.h"
11+
#include "soc/gpio_sig_map.h"
12+
#include "esp_rom_gpio.h"
13+
}
14+
15+
16+
I2SClass i2s;
17+
ES8388 codec;
18+
Audio *audio = nullptr;
19+
Adafruit_NeoPixel pixels(NUMPIXELS, NEOPIXEL_PIN, NEO_GRB + NEO_KHZ800);
20+
21+
22+
#define PTT_PIN BUTTON_PAUSE
23+
#define MAX_DURATION 15
24+
#define T 1024
25+
#define RATE 16000
26+
uint8_t* wav_buffer;
27+
int max_size;
28+
#define maxVol 100
29+
int volume = 60;
30+
int microVol = 80;
31+
uint8_t phase = 0;
32+
33+
// WiFi settings
34+
const char* ssid = "xhkap";
35+
const char* password = "12345678";
36+
// OpenAI API key
37+
const char* apiKey = MuseAISettings::OPENAI_API_KEY;
38+
bool gettingResponse = false;
39+
// Initialize MuseAI instance
40+
MuseAI museAI(apiKey);
41+
42+
//Task Core 0 audio loop
43+
void audioLoop(void* x) {
44+
while (true) {
45+
if(audio != nullptr) audio->loop();
46+
47+
48+
delay(1);
49+
}
50+
}
51+
52+
void modVol(void* x) {
53+
while(true)
54+
{
55+
if(gpio_get_level(BUTTON_VOL_PLUS) == 0)
56+
{
57+
delay(50);
58+
volume += 5;
59+
if(volume > maxVol) volume = maxVol;
60+
}
61+
if(gpio_get_level(BUTTON_VOL_MINUS) == 0)
62+
{
63+
delay(50);
64+
volume -= 5;
65+
if(volume < 0) volume = 0;
66+
}
67+
delay(1);
68+
}
69+
70+
}
71+
static inline void release_mclk_pin() {
72+
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5,0,0)
73+
esp_rom_gpio_connect_out_signal((gpio_num_t)I2S_MCLK, SIG_GPIO_OUT_IDX, false, false);
74+
#else
75+
gpio_matrix_out((gpio_num_t)I2S_MCLK, SIG_GPIO_OUT_IDX, false, false);
76+
#endif
77+
gpio_reset_pin((gpio_num_t)I2S_MCLK);
78+
pinMode(I2S_MCLK, INPUT);
79+
}
80+
81+
void audioOpen() {
82+
if (!audio) {
83+
audio = new Audio();
84+
audio->setPinout(I2S_BCLK, I2S_LRCK, I2S_SDOUT, I2S_MCLK);
85+
audio->setVolume(21);
86+
}
87+
}
88+
89+
void audioClose() {
90+
if (audio) {
91+
audio->stopSong();
92+
delay(10);
93+
delete audio; // force la libération des canaux I2S internes
94+
audio = nullptr;
95+
release_mclk_pin(); // au cas où la lib laisse MCLK mappé
96+
}
97+
}
98+
99+
void setup() {
100+
// Initialize serial port
101+
Serial.begin(115200);
102+
delay(1000); // Give serial port some time to initialize
103+
104+
Serial.println("\n\n----- Voice Assistant System Starting -----");
105+
pixels.begin();
106+
pixels.setPixelColor(0, pixels.Color(255, 0, 0)); // Red ==> init
107+
pixels.show();
108+
// Connect to WiFi network
109+
WiFi.mode(WIFI_STA);
110+
WiFi.begin(ssid, password);
111+
// Reduce WiFi power-save latency
112+
WiFi.setSleep(false);
113+
Serial.println("Connecting to WiFi...");
114+
115+
int attempt = 0;
116+
while (WiFi.status() != WL_CONNECTED && attempt < 20) {
117+
Serial.print('.');
118+
delay(1000);
119+
attempt++;
120+
}
121+
122+
if (WiFi.status() == WL_CONNECTED) {
123+
Serial.println("\nWiFi connected successfully!");
124+
Serial.print("IP address: ");
125+
Serial.println(WiFi.localIP());
126+
}
127+
128+
while (not codec.begin(IIC_DATA, IIC_CLK))
129+
{
130+
Serial.printf("Failed!\n");
131+
delay(1000);
132+
}
133+
codec.volume(ES8388::ES_MAIN, 100);
134+
codec.volume(ES8388::ES_OUT1, volume);
135+
codec.ALC(false);
136+
codec.microphone_volume(microVol);
137+
// codec.select_internal_microphone();
138+
codec.write_reg(ES8388_ADDR, 12, 0x4C); //16b internal microphone
139+
140+
// power enable
141+
gpio_reset_pin(GPIO_PA_EN);
142+
gpio_set_direction(GPIO_PA_EN, GPIO_MODE_OUTPUT);
143+
gpio_set_level(GPIO_PA_EN, HIGH);
144+
145+
// PTT button
146+
gpio_reset_pin(PTT_PIN);
147+
gpio_set_direction(PTT_PIN, GPIO_MODE_INPUT);
148+
gpio_set_pull_mode(PTT_PIN, GPIO_PULLUP_ONLY);
149+
// BUTTON_VOL_MINUS
150+
gpio_reset_pin(BUTTON_VOL_MINUS);
151+
gpio_set_direction(BUTTON_VOL_MINUS, GPIO_MODE_INPUT);
152+
gpio_set_pull_mode(BUTTON_VOL_MINUS, GPIO_PULLUP_ONLY);
153+
// BUTTON_VOL_PLUS
154+
gpio_reset_pin(BUTTON_VOL_PLUS);
155+
gpio_set_direction(BUTTON_VOL_PLUS, GPIO_MODE_INPUT);
156+
gpio_set_pull_mode(BUTTON_VOL_PLUS, GPIO_PULLUP_ONLY);
157+
// i2s
158+
i2s.setPins(I2S_BCLK, I2S_LRCK, I2S_SDOUT, I2S_SDIN, I2S_MCLK);
159+
160+
// audio input buffer (MAX_DURATION)
161+
max_size = MAX_DURATION * RATE * 2 + 44;
162+
wav_buffer = (uint8_t*)ps_malloc(max_size);
163+
164+
xTaskCreatePinnedToCore(audioLoop, "audioLoop", 8192, nullptr, 4, nullptr, 0);
165+
xTaskCreatePinnedToCore(modVol, "modVol", 8192, nullptr, 4, nullptr, 1);
166+
167+
pixels.setPixelColor(0, pixels.Color(0, 0, 255)); // Blue ==> waiting for question
168+
pixels.show();
169+
Serial.println("A question?");
170+
}
171+
172+
void loop() {
173+
174+
size_t wav_size;
175+
int p, t;
176+
pcm_wav_header_t H = PCM_WAV_HEADER_DEFAULT(0, 16, RATE, 1);
177+
178+
if(phase == 0)
179+
{
180+
pixels.setPixelColor(0, pixels.Color(0, 0, 255)); // Blue ==> waiting for question
181+
pixels.show();
182+
}
183+
184+
if (gpio_get_level(PTT_PIN) == 0) {
185+
186+
pixels.setPixelColor(0, pixels.Color(0, 255, 0)); // Green ==> question
187+
pixels.show();
188+
phase = 1;
189+
audioClose();
190+
if (!i2s.begin(I2S_MODE_STD, RATE, I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO, I2S_STD_SLOT_LEFT)) {
191+
printf("Failed to initialize I2S!\n");
192+
for(;;); // do nothing
193+
}
194+
Serial.println("TALK! (for 15sec max)");
195+
p = 44;
196+
// Record audio while PTT_PIN pushed
197+
while ((gpio_get_level(PTT_PIN) == 0) && (p < max_size)) {
198+
t = i2s.readBytes((char*)wav_buffer + p, T);
199+
p += t;
200+
}
201+
// record amp
202+
uint16_t* q = (uint16_t*) wav_buffer;
203+
for(int i=0; i<p/2; i++) q[i] = q[i] << 4;
204+
205+
pixels.setPixelColor(0, pixels.Color(255, 165, 0)); // Orange ==> transfering
206+
pixels.show();
207+
208+
// adding header as a .wav record
209+
H.descriptor_chunk.chunk_size = p;
210+
H.data_chunk.subchunk_size = p - PCM_WAV_HEADER_SIZE;
211+
uint8_t* e = (uint8_t*)&H;
212+
for (int i = 0; i < 44; i++) wav_buffer[i] = e[i];
213+
wav_size = p;
214+
Serial.println(p);
215+
i2s.playWAV(wav_buffer, wav_size);
216+
i2s.end();
217+
218+
// Convert speech to text
219+
unsigned long t_ptt_release = millis();
220+
Serial.println("PTT released. Starting timing...");
221+
unsigned long t_stt_start = millis();
222+
String command = museAI.speechToTextFromBuffer(wav_buffer, wav_size);
223+
unsigned long t_stt_end = millis();
224+
Serial.print("User: ");
225+
Serial.println(command);
226+
// sending question to chatgpt
227+
Serial.println("Sending request to ChatGPT...");
228+
gettingResponse = true;
229+
unsigned long t_llm_start = millis();
230+
String response = museAI.sendMessage(command);
231+
unsigned long t_llm_end = millis();
232+
unsigned long t_tts_start = 0;
233+
unsigned long t_first_audio = 0;
234+
if (response.length() > 0) {
235+
Serial.println(response);
236+
237+
t_tts_start = millis();
238+
239+
// tts and playing answer
240+
audioOpen();
241+
codec.volume(ES8388::ES_OUT1, volume);
242+
// audio.connecttohost("http://direct.fipradio.fr/live/fip-midfi.mp3");
243+
audio->openai_speech(apiKey, "tts-1", response, "", "alloy", "mp3", "1");
244+
245+
pixels.setPixelColor(0, pixels.Color(255, 255, 0)); // Yellow ==> answering
246+
pixels.show();
247+
phase = 2;
248+
unsigned long wait_deadline = millis() + 15000;
249+
while (millis() < wait_deadline) {
250+
if (audio->isRunning()) {
251+
t_first_audio = millis();
252+
break;
253+
}
254+
delay(10);
255+
}
256+
} else
257+
Serial.println("Failed to get ChatGPT response");
258+
unsigned long stt_ms = t_stt_end - t_stt_start;
259+
H.data_chunk.subchunk_size = p - PCM_WAV_HEADER_SIZE;
260+
unsigned long llm_ms = t_llm_end - t_llm_start;
261+
unsigned long tts_to_start_ms = (t_first_audio && t_tts_start) ? (t_first_audio - t_tts_start) : 0;
262+
unsigned long total_ms = t_first_audio ? (t_first_audio - t_ptt_release) : (millis() - t_ptt_release);
263+
Serial.printf("Timing (ms): STT=%lu, LLM=%lu, TTS->first_audio=%lu, TOTAL(from PTT release)=%lu\n", stt_ms, llm_ms, tts_to_start_ms, total_ms);
264+
// waiting end of response (speech)
265+
while(phase != 0) delay(10);
266+
Serial.println("Any more questions?");
267+
}
268+
269+
}
270+
271+
void audio_id3data(const char *info) { //id3 metadata
272+
//Serial.print("id3data ");Serial.println(info);
273+
}
274+
void audio_eof_mp3(const char *info) { //end of file
275+
Serial.print("eof_mp3 ");Serial.println(info);
276+
}
277+
void audio_showstation(const char *info) {
278+
//Serial.print("station ");Serial.println(info);
279+
}
280+
void audio_showstreaminfo(const char *info) {
281+
// Serial.print("streaminfo ");Serial.println(info);
282+
}
283+
void audio_showstreamtitle(const char *info) {
284+
285+
}
286+
void audio_bitrate(const char *info) {
287+
// Serial.print("bitrate ");Serial.println(info);
288+
}
289+
void audio_commercial(const char *info) { //duration in sec
290+
// Serial.print("commercial ");Serial.println(info);
291+
}
292+
void audio_icyurl(const char *info) { //homepage
293+
// Serial.print("icyurl ");Serial.println(info);
294+
}
295+
void audio_lasthost(const char *info) { //stream URL played
296+
//Serial.print("lasthost ");Serial.println(info);
297+
}
298+
void audio_eof_speech(const char *info) {
299+
Serial.print("eof_speech ");Serial.println(info);
300+
phase = 0;
301+
}

0 commit comments

Comments
 (0)