Skip to content

Commit b0a58fe

Browse files
committed
Merge branch 'feature/openai_support_voice_change' into 'main'
Add voice change support for OpenAI Demo See merge request adf/esp-webrtc-solution!17
2 parents bde3198 + 192e610 commit b0a58fe

File tree

4 files changed

+93
-5
lines changed

4 files changed

+93
-5
lines changed

solutions/openai_demo/main/common.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ extern "C" {
2424
*/
2525
void init_board(void);
2626

27+
/**
28+
* @brief OpenAI signaling configuration
29+
*
30+
* @note Details see: https://platform.openai.com/docs/api-reference/realtime-sessions/create#realtime-sessions-create-voice
31+
*/
32+
typedef struct {
33+
char *token; /*!< OpenAI token */
34+
char *voice; /*!< Voice to select */
35+
} openai_signaling_cfg_t;
36+
2737
/**
2838
* @brief Get OpenAI signaling implementation
2939
*

solutions/openai_demo/main/main.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ static void thread_scheduler(const char *thread_name, media_lib_thread_cfg_t *th
177177
thread_cfg->priority = 18;
178178
thread_cfg->core_id = 1;
179179
}
180+
if (strcmp(thread_name, "start") == 0) {
181+
thread_cfg->stack_size = 6 * 1024;
182+
}
180183
if (strcmp(thread_name, "pc_send") == 0) {
181184
thread_cfg->stack_size = 4 * 1024;
182185
thread_cfg->priority = 15;

solutions/openai_demo/main/openai_signaling.c

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,29 +13,99 @@
1313
#include "https_client.h"
1414
#include "common.h"
1515
#include "esp_log.h"
16+
#include <cJSON.h>
1617

17-
#define TAG "OPENAI_SIGNALING"
18+
#define TAG "OPENAI_SIGNALING"
1819

19-
#define OPENAI_REALTIME_URL "https://api.openai.com/v1/realtime?model=gpt-4o-mini-realtime-preview-2024-12-17"
20+
// Prefer to use mini model currently
21+
#define OPENAI_REALTIME_MODEL "gpt-4o-mini-realtime-preview-2024-12-17"
22+
#define OPENAI_REALTIME_URL "https://api.openai.com/v1/realtime?model=" OPENAI_REALTIME_MODEL
2023

2124
#define SAFE_FREE(p) if (p) { \
2225
free(p); \
2326
p = NULL; \
2427
}
2528

29+
#define GET_KEY_END(str, key) get_key_end(str, key, sizeof(key) - 1)
30+
2631
typedef struct {
2732
esp_peer_signaling_cfg_t cfg;
2833
uint8_t *remote_sdp;
2934
int remote_sdp_size;
35+
char *ephemeral_token;
3036
} openai_signaling_t;
3137

38+
static char *get_key_end(char *str, char *key, int len)
39+
{
40+
char *p = strstr(str, key);
41+
if (p == NULL) {
42+
return NULL;
43+
}
44+
return p + len;
45+
}
46+
47+
static void session_answer(http_resp_t *resp, void *ctx)
48+
{
49+
openai_signaling_t *sig = (openai_signaling_t *)ctx;
50+
char *token = GET_KEY_END((char *)resp->data, "\"client_secret\"");
51+
if (token == NULL) {
52+
return;
53+
}
54+
char *secret = GET_KEY_END(token, "\"value\"");
55+
if (secret == NULL) {
56+
return;
57+
}
58+
char *s = strchr(secret, '"');
59+
if (s == NULL) {
60+
return;
61+
}
62+
s++;
63+
char *e = strchr(s, '"');
64+
*e = 0;
65+
sig->ephemeral_token = strdup(s);
66+
*e = '"';
67+
}
68+
69+
static void get_ephemeral_token(openai_signaling_t *sig, char *token, char *voice)
70+
{
71+
char content_type[32] = "Content-Type: application/json";
72+
int len = strlen("Authorization: Bearer ") + strlen(token) + 1;
73+
char auth[len];
74+
snprintf(auth, len, "Authorization: Bearer %s", token);
75+
char *header[] = {
76+
content_type,
77+
auth,
78+
NULL,
79+
};
80+
cJSON *root = cJSON_CreateObject();
81+
cJSON_AddStringToObject(root, "model", OPENAI_REALTIME_MODEL);
82+
cJSON *modalities = cJSON_CreateArray();
83+
cJSON_AddItemToArray(modalities, cJSON_CreateString("text"));
84+
cJSON_AddItemToArray(modalities, cJSON_CreateString("audio"));
85+
cJSON_AddItemToObject(root, "modalities", modalities);
86+
cJSON_AddStringToObject(root, "voice", voice);
87+
char *json_string = cJSON_Print(root);
88+
if (json_string) {
89+
https_post("https://api.openai.com/v1/realtime/sessions", header, json_string, NULL, session_answer, sig);
90+
free(json_string);
91+
}
92+
cJSON_Delete(root);
93+
}
94+
3295
static int openai_signaling_start(esp_peer_signaling_cfg_t *cfg, esp_peer_signaling_handle_t *h)
3396
{
3497
openai_signaling_t *sig = (openai_signaling_t *)calloc(1, sizeof(openai_signaling_t));
3598
if (sig == NULL) {
3699
return ESP_PEER_ERR_NO_MEM;
37100
}
101+
openai_signaling_cfg_t *openai_cfg = (openai_signaling_cfg_t *)cfg->extra_cfg;
38102
sig->cfg = *cfg;
103+
// alloy, ash, ballad, coral, echo sage, shimmer and verse
104+
get_ephemeral_token(sig, openai_cfg->token, openai_cfg->voice ? openai_cfg->voice : "alloy");
105+
if (sig->ephemeral_token == NULL) {
106+
free(sig);
107+
return ESP_PEER_ERR_NOT_SUPPORT;
108+
}
39109
*h = sig;
40110
esp_peer_signaling_ice_info_t ice_info = {
41111
.is_initiator = true,
@@ -67,9 +137,10 @@ static int openai_signaling_send_msg(esp_peer_signaling_handle_t h, esp_peer_sig
67137
} else if (msg->type == ESP_PEER_SIGNALING_MSG_SDP) {
68138
printf("Receive local SDP\n");
69139
char content_type[32] = "Content-Type: application/sdp";
70-
int len = strlen("Authorization: Bearer ") + strlen((char *)sig->cfg.extra_cfg) + 1;
140+
char *token = sig->ephemeral_token;
141+
int len = strlen("Authorization: Bearer ") + strlen(token) + 1;
71142
char auth[len];
72-
snprintf(auth, len, "Authorization: Bearer %s", (char *)sig->cfg.extra_cfg);
143+
snprintf(auth, len, "Authorization: Bearer %s", token);
73144
char *header[] = {
74145
content_type,
75146
auth,
@@ -95,6 +166,7 @@ static int openai_signaling_stop(esp_peer_signaling_handle_t h)
95166
openai_signaling_t *sig = (openai_signaling_t *)h;
96167
sig->cfg.on_close(sig->cfg.ctx);
97168
SAFE_FREE(sig->remote_sdp);
169+
SAFE_FREE(sig->ephemeral_token);
98170
SAFE_FREE(sig);
99171
return 0;
100172
}

solutions/openai_demo/main/webrtc.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,9 @@ int start_webrtc(void)
519519
esp_peer_default_cfg_t peer_cfg = {
520520
.agent_recv_timeout = 500,
521521
};
522+
openai_signaling_cfg_t openai_cfg = {
523+
.token = OPENAI_API_KEY,
524+
};
522525
esp_webrtc_cfg_t cfg = {
523526
.peer_cfg = {
524527
.audio_info = {
@@ -536,7 +539,7 @@ int start_webrtc(void)
536539
.extra_cfg = &peer_cfg,
537540
.extra_size = sizeof(peer_cfg),
538541
},
539-
.signaling_cfg.extra_cfg = OPENAI_API_KEY,
542+
.signaling_cfg.extra_cfg = &openai_cfg,
540543
.peer_impl = esp_peer_get_default_impl(),
541544
.signaling_impl = esp_signaling_get_openai_signaling(),
542545
};

0 commit comments

Comments
 (0)