Skip to content

Commit 076e7d3

Browse files
committed
Implement OpenRouter TTS
1 parent 7cb5ebe commit 076e7d3

File tree

4 files changed

+315
-3
lines changed

4 files changed

+315
-3
lines changed

packages/i18n/src/locales/en/settings.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,9 @@ pages:
754754
openrouter:
755755
description: openrouter.ai
756756
title: OpenRouter
757+
openrouter-audio-speech:
758+
description: openrouter.ai
759+
title: OpenRouter
757760
perplexity:
758761
description: perplexity.ai
759762
title: Perplexity
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
<script setup lang="ts">
2+
import type { SpeechProvider } from '@xsai-ext/providers/utils'
3+
4+
import {
5+
SpeechPlayground,
6+
SpeechProviderSettings,
7+
} from '@proj-airi/stage-ui/components'
8+
import { useSpeechStore } from '@proj-airi/stage-ui/stores/modules/speech'
9+
import { useProvidersStore } from '@proj-airi/stage-ui/stores/providers'
10+
import { Callout, Select } from '@proj-airi/ui'
11+
import { storeToRefs } from 'pinia'
12+
import { computed, onMounted, watch } from 'vue'
13+
14+
const speechStore = useSpeechStore()
15+
const providersStore = useProvidersStore()
16+
const { providers } = storeToRefs(providersStore)
17+
18+
const providerId = 'openrouter-audio-speech'
19+
const defaultModel = 'openai/gpt-audio-mini'
20+
21+
const model = computed({
22+
get: () => providers.value[providerId]?.model as string | undefined || defaultModel,
23+
set: (value) => {
24+
if (!providers.value[providerId])
25+
providers.value[providerId] = {}
26+
providers.value[providerId].model = value
27+
},
28+
})
29+
30+
const providerModels = computed(() => providersStore.getModelsForProvider(providerId))
31+
const isLoadingModels = computed(() => providersStore.isLoadingModels[providerId] || false)
32+
const apiKeyConfigured = computed(() => !!providers.value[providerId]?.apiKey)
33+
34+
const availableVoices = computed(() => {
35+
return speechStore.availableVoices[providerId] || []
36+
})
37+
38+
onMounted(async () => {
39+
await providersStore.fetchModelsForProvider(providerId)
40+
await speechStore.loadVoicesForProvider(providerId)
41+
})
42+
43+
async function handleGenerateSpeech(input: string, voiceId: string, _useSSML: boolean) {
44+
const provider = await providersStore.getProviderInstance<SpeechProvider<string>>(providerId)
45+
if (!provider)
46+
throw new Error('Failed to initialize speech provider')
47+
48+
const modelToUse = model.value || defaultModel
49+
50+
return await speechStore.speech(
51+
provider,
52+
modelToUse,
53+
input,
54+
voiceId,
55+
)
56+
}
57+
58+
watch(model, async () => {
59+
const providerConfig = providersStore.getProviderConfig(providerId)
60+
providerConfig.model = model.value
61+
})
62+
</script>
63+
64+
<template>
65+
<SpeechProviderSettings :provider-id="providerId" :default-model="defaultModel">
66+
<template #voice-settings>
67+
<div class="space-y-3">
68+
<Callout label="Model">
69+
<div>
70+
<p>Select the audio-capable model to use for speech generation</p>
71+
</div>
72+
</Callout>
73+
<div>
74+
<Select
75+
v-model="model"
76+
:options="providerModels.map(m => ({ value: m.id, label: m.name }))"
77+
:disabled="isLoadingModels || providerModels.length === 0"
78+
placeholder="Select a model..."
79+
/>
80+
</div>
81+
</div>
82+
</template>
83+
84+
<template #playground>
85+
<SpeechPlayground
86+
:available-voices="availableVoices"
87+
:generate-speech="handleGenerateSpeech"
88+
:api-key-configured="apiKeyConfigured"
89+
default-text="Hello! This is a test of OpenRouter Speech."
90+
/>
91+
</template>
92+
</SpeechProviderSettings>
93+
</template>
94+
95+
<route lang="yaml">
96+
meta:
97+
layout: settings
98+
stageTransition:
99+
name: slide
100+
</route>

packages/stage-ui/src/stores/modules/speech.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,16 @@ export const useSpeechStore = defineStore('speech', () => {
131131
watch(
132132
() => providersStore.configuredSpeechProvidersMetadata.map(provider => provider.id),
133133
(configuredProviderIds) => {
134-
if (!activeSpeechProvider.value)
134+
if (!activeSpeechProvider.value || activeSpeechProvider.value === 'speech-noop')
135+
return
136+
137+
// NOTICE: only reset when the provider has actually been validated and found unconfigured.
138+
// Skip reset if validation hasn't run yet (validatedCredentialHash is undefined)
139+
// to avoid a race condition where immediate watcher fires before async validation completes.
140+
const runtimeState = providersStore.providerRuntimeState[activeSpeechProvider.value]
141+
if (runtimeState && runtimeState.validatedCredentialHash === undefined)
135142
return
136143

137-
// NOTICE: clear stale selection when the currently selected speech provider
138-
// is no longer configured to avoid implicit fallback behavior from persisted state.
139144
if (!configuredProviderIds.includes(activeSpeechProvider.value)) {
140145
activeSpeechProvider.value = 'speech-noop'
141146
activeSpeechModel.value = ''

packages/stage-ui/src/stores/providers.ts

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,209 @@ export const useProvidersStore = defineStore('providers', () => {
13501350
},
13511351
},
13521352
},
1353+
'openrouter-audio-speech': {
1354+
id: 'openrouter-audio-speech',
1355+
category: 'speech',
1356+
tasks: ['text-to-speech'],
1357+
nameKey: 'settings.pages.providers.provider.openrouter-audio-speech.title',
1358+
name: 'OpenRouter',
1359+
descriptionKey: 'settings.pages.providers.provider.openrouter-audio-speech.description',
1360+
description: 'openrouter.ai',
1361+
icon: 'i-lobe-icons:openrouter',
1362+
defaultOptions: () => ({
1363+
baseUrl: 'https://openrouter.ai/api/v1/',
1364+
}),
1365+
createProvider: async (config) => {
1366+
const apiKey = typeof config.apiKey === 'string' ? config.apiKey.trim() : ''
1367+
let baseUrl = typeof config.baseUrl === 'string' ? config.baseUrl.trim() : 'https://openrouter.ai/api/v1/'
1368+
if (!baseUrl)
1369+
baseUrl = 'https://openrouter.ai/api/v1/'
1370+
if (!baseUrl.endsWith('/'))
1371+
baseUrl += '/'
1372+
1373+
const provider: SpeechProvider = {
1374+
speech: (model?: string) => ({
1375+
baseURL: baseUrl,
1376+
model: model || 'openai/gpt-audio-mini',
1377+
fetch: async (_input: RequestInfo | URL, init?: RequestInit) => {
1378+
if (!init?.body || typeof init.body !== 'string')
1379+
throw new Error('Invalid request body')
1380+
1381+
const body = JSON.parse(init.body)
1382+
const text = body.input
1383+
const voice = body.voice
1384+
1385+
const sseResponse = await globalThis.fetch(new URL('chat/completions', baseUrl), {
1386+
method: 'POST',
1387+
headers: {
1388+
'Authorization': `Bearer ${apiKey}`,
1389+
'Content-Type': 'application/json',
1390+
},
1391+
body: JSON.stringify({
1392+
model: model || 'openai/gpt-audio-mini',
1393+
messages: [
1394+
{ role: 'user', content: `Read this text aloud exactly as written, without any commentary or extra words:\n\n${text}` },
1395+
],
1396+
modalities: ['text', 'audio'],
1397+
audio: { voice, format: 'pcm16' },
1398+
stream: true,
1399+
}),
1400+
})
1401+
1402+
if (!sseResponse.ok) {
1403+
const errorText = await sseResponse.text()
1404+
throw new Error(`OpenRouter audio request failed: ${sseResponse.status} ${errorText}`)
1405+
}
1406+
1407+
const reader = sseResponse.body!.getReader()
1408+
const decoder = new TextDecoder()
1409+
const audioDataChunks: string[] = []
1410+
let buffer = ''
1411+
1412+
while (true) {
1413+
const { done, value } = await reader.read()
1414+
if (done)
1415+
break
1416+
1417+
buffer += decoder.decode(value, { stream: true })
1418+
const lines = buffer.split('\n')
1419+
buffer = lines.pop()!
1420+
1421+
for (const line of lines) {
1422+
if (!line.startsWith('data: '))
1423+
continue
1424+
const data = line.slice('data: '.length).trim()
1425+
if (data === '[DONE]')
1426+
break
1427+
1428+
try {
1429+
const chunk = JSON.parse(data)
1430+
const audio = chunk.choices?.[0]?.delta?.audio
1431+
if (audio?.data)
1432+
audioDataChunks.push(audio.data)
1433+
}
1434+
catch {
1435+
// skip malformed chunks
1436+
}
1437+
}
1438+
}
1439+
1440+
// Decode base64 PCM16 data
1441+
const fullBase64 = audioDataChunks.join('')
1442+
const binaryString = atob(fullBase64)
1443+
const pcmBytes = new Uint8Array(binaryString.length)
1444+
for (let i = 0; i < binaryString.length; i++)
1445+
pcmBytes[i] = binaryString.charCodeAt(i)
1446+
1447+
// Wrap raw PCM16 in a WAV header so the browser can play it
1448+
const sampleRate = 24000
1449+
const numChannels = 1
1450+
const bitsPerSample = 16
1451+
const byteRate = sampleRate * numChannels * (bitsPerSample / 8)
1452+
const blockAlign = numChannels * (bitsPerSample / 8)
1453+
const wavHeader = new ArrayBuffer(44)
1454+
const view = new DataView(wavHeader)
1455+
const writeStr = (offset: number, str: string) => {
1456+
for (let i = 0; i < str.length; i++)
1457+
view.setUint8(offset + i, str.charCodeAt(i))
1458+
}
1459+
writeStr(0, 'RIFF')
1460+
view.setUint32(4, 36 + pcmBytes.length, true)
1461+
writeStr(8, 'WAVE')
1462+
writeStr(12, 'fmt ')
1463+
view.setUint32(16, 16, true)
1464+
view.setUint16(20, 1, true)
1465+
view.setUint16(22, numChannels, true)
1466+
view.setUint32(24, sampleRate, true)
1467+
view.setUint32(28, byteRate, true)
1468+
view.setUint16(32, blockAlign, true)
1469+
view.setUint16(34, bitsPerSample, true)
1470+
writeStr(36, 'data')
1471+
view.setUint32(40, pcmBytes.length, true)
1472+
1473+
const wavBytes = new Uint8Array(44 + pcmBytes.length)
1474+
wavBytes.set(new Uint8Array(wavHeader), 0)
1475+
wavBytes.set(pcmBytes, 44)
1476+
1477+
return new Response(wavBytes.buffer, {
1478+
status: 200,
1479+
headers: { 'Content-Type': 'audio/wav' },
1480+
})
1481+
},
1482+
}),
1483+
}
1484+
return provider
1485+
},
1486+
capabilities: {
1487+
listModels: async (config: Record<string, unknown>) => {
1488+
let baseUrl = typeof config.baseUrl === 'string' ? config.baseUrl.trim() : 'https://openrouter.ai/api/v1/'
1489+
if (!baseUrl)
1490+
baseUrl = 'https://openrouter.ai/api/v1/'
1491+
if (!baseUrl.endsWith('/'))
1492+
baseUrl += '/'
1493+
1494+
try {
1495+
const res = await fetch(`${baseUrl}models?output_modality=audio`)
1496+
if (!res.ok)
1497+
return []
1498+
1499+
const json = await res.json()
1500+
const models = json.data || []
1501+
return models.map((m: any) => ({
1502+
id: m.id,
1503+
name: m.name || m.id,
1504+
provider: 'openrouter-audio-speech',
1505+
description: m.description || '',
1506+
contextLength: m.context_length || 0,
1507+
deprecated: false,
1508+
} satisfies ModelInfo))
1509+
}
1510+
catch {
1511+
return []
1512+
}
1513+
},
1514+
listVoices: async () => {
1515+
// OpenRouter audio models support standard OpenAI voices
1516+
return [
1517+
'alloy',
1518+
'ash',
1519+
'ballad',
1520+
'coral',
1521+
'echo',
1522+
'fable',
1523+
'onyx',
1524+
'nova',
1525+
'sage',
1526+
'shimmer',
1527+
'verse',
1528+
].map(id => ({
1529+
id,
1530+
name: id.charAt(0).toUpperCase() + id.slice(1),
1531+
provider: 'openrouter-audio-speech',
1532+
languages: [],
1533+
} satisfies VoiceInfo))
1534+
},
1535+
},
1536+
validators: {
1537+
validateProviderConfig: (config) => {
1538+
const errors: Error[] = []
1539+
if (!config.apiKey)
1540+
errors.push(new Error('API Key is required.'))
1541+
1542+
if (config.baseUrl) {
1543+
const res = baseUrlValidator.value(config.baseUrl)
1544+
if (res)
1545+
return res
1546+
}
1547+
1548+
return {
1549+
errors,
1550+
reason: errors.map(e => e.message).join(', '),
1551+
valid: errors.length === 0,
1552+
}
1553+
},
1554+
},
1555+
},
13531556
'comet-api-speech': buildOpenAICompatibleProvider({
13541557
id: 'comet-api-speech',
13551558
name: 'CometAPI Speech',
@@ -2198,6 +2401,7 @@ export const useProvidersStore = defineStore('providers', () => {
21982401
deleteProvider,
21992402
availableProviders,
22002403
configuredProviders,
2404+
providerRuntimeState,
22012405
providerMetadata,
22022406
getProviderMetadata,
22032407
getTranscriptionFeatures,

0 commit comments

Comments
 (0)