Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/en/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,9 @@ pages:
openrouter:
description: openrouter.ai
title: OpenRouter
openrouter-audio-speech:
description: openrouter.ai
title: OpenRouter
perplexity:
description: perplexity.ai
title: Perplexity
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
<script setup lang="ts">
import type { SpeechProvider } from '@xsai-ext/providers/utils'
import {
SpeechPlayground,
SpeechProviderSettings,
} from '@proj-airi/stage-ui/components'
import { useSpeechStore } from '@proj-airi/stage-ui/stores/modules/speech'
import { useProvidersStore } from '@proj-airi/stage-ui/stores/providers'
import { Callout, Select } from '@proj-airi/ui'
import { storeToRefs } from 'pinia'
import { computed, onMounted, watch } from 'vue'
const speechStore = useSpeechStore()
const providersStore = useProvidersStore()
const { providers } = storeToRefs(providersStore)
const providerId = 'openrouter-audio-speech'
const defaultModel = 'openai/gpt-audio-mini'
const model = computed({
get: () => providers.value[providerId]?.model as string | undefined || defaultModel,
set: (value) => {
if (!providers.value[providerId])
providers.value[providerId] = {}
providers.value[providerId].model = value
},
})
const providerModels = computed(() => providersStore.getModelsForProvider(providerId))
const isLoadingModels = computed(() => providersStore.isLoadingModels[providerId] || false)
const apiKeyConfigured = computed(() => !!providers.value[providerId]?.apiKey)
const availableVoices = computed(() => {
return speechStore.availableVoices[providerId] || []
})
onMounted(async () => {
await providersStore.fetchModelsForProvider(providerId)
await speechStore.loadVoicesForProvider(providerId)
})
async function handleGenerateSpeech(input: string, voiceId: string, _useSSML: boolean) {
const provider = await providersStore.getProviderInstance<SpeechProvider<string>>(providerId)
if (!provider)
throw new Error('Failed to initialize speech provider')
const modelToUse = model.value || defaultModel
return await speechStore.speech(
provider,
modelToUse,
input,
voiceId,
)
}
watch(model, async () => {
const providerConfig = providersStore.getProviderConfig(providerId)
providerConfig.model = model.value
})
</script>

<template>
<SpeechProviderSettings :provider-id="providerId" :default-model="defaultModel">
<template #voice-settings>
<div class="space-y-3">
<Callout label="Model">
<div>
<p>Select the audio-capable model to use for speech generation</p>
</div>
</Callout>
<div>
<Select
v-model="model"
:options="providerModels.map(m => ({ value: m.id, label: m.name }))"
:disabled="isLoadingModels || providerModels.length === 0"
placeholder="Select a model..."
/>
</div>
</div>
</template>

<template #playground>
<SpeechPlayground
:available-voices="availableVoices"
:generate-speech="handleGenerateSpeech"
:api-key-configured="apiKeyConfigured"
default-text="Hello! This is a test of OpenRouter Speech."
/>
</template>
</SpeechProviderSettings>
</template>

<route lang="yaml">
meta:
layout: settings
stageTransition:
name: slide
</route>
11 changes: 8 additions & 3 deletions packages/stage-ui/src/stores/modules/speech.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,16 @@ export const useSpeechStore = defineStore('speech', () => {
watch(
() => providersStore.configuredSpeechProvidersMetadata.map(provider => provider.id),
(configuredProviderIds) => {
if (!activeSpeechProvider.value)
if (!activeSpeechProvider.value || activeSpeechProvider.value === 'speech-noop')
return

// NOTICE: only reset when the provider has actually been validated and found unconfigured.
// Skip reset if validation hasn't run yet (validatedCredentialHash is undefined)
// to avoid a race condition where immediate watcher fires before async validation completes.
const runtimeState = providersStore.providerRuntimeState[activeSpeechProvider.value]
if (runtimeState && runtimeState.validatedCredentialHash === undefined)
return

// NOTICE: clear stale selection when the currently selected speech provider
// is no longer configured to avoid implicit fallback behavior from persisted state.
if (!configuredProviderIds.includes(activeSpeechProvider.value)) {
activeSpeechProvider.value = 'speech-noop'
activeSpeechModel.value = ''
Expand Down
207 changes: 207 additions & 0 deletions packages/stage-ui/src/stores/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,212 @@ export const useProvidersStore = defineStore('providers', () => {
},
},
},
'openrouter-audio-speech': {
id: 'openrouter-audio-speech',
category: 'speech',
tasks: ['text-to-speech'],
nameKey: 'settings.pages.providers.provider.openrouter-audio-speech.title',
name: 'OpenRouter',
descriptionKey: 'settings.pages.providers.provider.openrouter-audio-speech.description',
description: 'openrouter.ai',
icon: 'i-lobe-icons:openrouter',
defaultOptions: () => ({
baseUrl: 'https://openrouter.ai/api/v1/',
}),
createProvider: async (config) => {
const apiKey = typeof config.apiKey === 'string' ? config.apiKey.trim() : ''
let baseUrl = (typeof config.baseUrl === 'string' && config.baseUrl.trim()) || 'https://openrouter.ai/api/v1/'
if (!baseUrl.endsWith('/'))
baseUrl += '/'

const provider: SpeechProvider = {
speech: (model?: string) => ({
baseURL: baseUrl,
model: model || 'openai/gpt-audio-mini',
fetch: async (_input: RequestInfo | URL, init?: RequestInit) => {
if (!init?.body || typeof init.body !== 'string')
throw new Error('Invalid request body')

const body = JSON.parse(init.body)
const text = body.input
const voice = body.voice

const sseResponse = await globalThis.fetch(new URL('chat/completions', baseUrl), {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model || 'openai/gpt-audio-mini',
messages: [
{ role: 'user', content: `Read this text aloud exactly as written, without any commentary or extra words:\n\n${text}` },
],
modalities: ['text', 'audio'],
audio: { voice, format: 'pcm16' },
stream: true,
}),
})

if (!sseResponse.ok) {
const errorText = await sseResponse.text()
throw new Error(`OpenRouter audio request failed: ${sseResponse.status} ${errorText}`)
}

const reader = sseResponse.body!.getReader()
const decoder = new TextDecoder()
const audioDataChunks: string[] = []
let buffer = ''

while (true) {
const { done, value } = await reader.read()
if (done)
break

buffer += decoder.decode(value, { stream: true })
const lines = buffer.split('\n')
buffer = lines.pop()!

for (const line of lines) {
if (!line.startsWith('data: '))
continue
const data = line.slice('data: '.length).trim()
if (data === '[DONE]')
break

try {
const chunk = JSON.parse(data)
const audio = chunk.choices?.[0]?.delta?.audio
if (audio?.data)
audioDataChunks.push(audio.data)
}
catch (e) {
// skip malformed chunks, but log them for debugging
console.warn('Skipping malformed SSE chunk from OpenRouter audio stream:', data, e)
}
}
}

// Decode base64 PCM16 data
const fullBase64 = audioDataChunks.join('')
const binaryString = atob(fullBase64)
const pcmBytes = new Uint8Array(binaryString.length)
for (let i = 0; i < binaryString.length; i++)
pcmBytes[i] = binaryString.charCodeAt(i)

// Wrap raw PCM16 in a WAV header so the browser can play it
const sampleRate = 24000
const numChannels = 1
const bitsPerSample = 16
const byteRate = sampleRate * numChannels * (bitsPerSample / 8)
const blockAlign = numChannels * (bitsPerSample / 8)
const wavHeader = new ArrayBuffer(44)
const view = new DataView(wavHeader)
const writeStr = (offset: number, str: string) => {
for (let i = 0; i < str.length; i++)
view.setUint8(offset + i, str.charCodeAt(i))
}

// RIFF chunk descriptor
writeStr(0, 'RIFF') // ChunkID
view.setUint32(4, 36 + pcmBytes.length, true) // ChunkSize
writeStr(8, 'WAVE') // Format

// "fmt " sub-chunk
writeStr(12, 'fmt ') // Subchunk1ID
view.setUint32(16, 16, true) // Subchunk1Size (16 for PCM)
view.setUint16(20, 1, true) // AudioFormat (1 for PCM)
view.setUint16(22, numChannels, true) // NumChannels
view.setUint32(24, sampleRate, true) // SampleRate
view.setUint32(28, byteRate, true) // ByteRate
view.setUint16(32, blockAlign, true) // BlockAlign
view.setUint16(34, bitsPerSample, true) // BitsPerSample

// "data" sub-chunk
writeStr(36, 'data') // Subchunk2ID
view.setUint32(40, pcmBytes.length, true) // Subchunk2Size

const wavBytes = new Uint8Array(44 + pcmBytes.length)
wavBytes.set(new Uint8Array(wavHeader), 0)
wavBytes.set(pcmBytes, 44)

return new Response(wavBytes.buffer, {
status: 200,
headers: { 'Content-Type': 'audio/wav' },
})
},
}),
}
return provider
},
capabilities: {
listModels: async (config: Record<string, unknown>) => {
let baseUrl = (typeof config.baseUrl === 'string' && config.baseUrl.trim()) || 'https://openrouter.ai/api/v1/'
if (!baseUrl.endsWith('/'))
baseUrl += '/'

try {
const res = await fetch(`${baseUrl}models?output_modality=audio`)
if (!res.ok)
return []

const json = await res.json()
const models = json.data || []
return models.map((m: any) => ({
id: m.id,
name: m.name || m.id,
provider: 'openrouter-audio-speech',
description: m.description || '',
contextLength: m.context_length || 0,
deprecated: false,
} satisfies ModelInfo))
}
catch {
return []
}
},
listVoices: async () => {
// OpenRouter audio models support standard OpenAI voices
return [
'alloy',
'ash',
'ballad',
'coral',
'echo',
'fable',
'onyx',
'nova',
'sage',
'shimmer',
'verse',
].map(id => ({
id,
name: id.charAt(0).toUpperCase() + id.slice(1),
provider: 'openrouter-audio-speech',
languages: [],
} satisfies VoiceInfo))
},
},
validators: {
validateProviderConfig: (config) => {
const errors: Error[] = []
if (!config.apiKey)
errors.push(new Error('API Key is required.'))

if (config.baseUrl) {
const res = baseUrlValidator.value(config.baseUrl)
if (res)
return res
}

return {
errors,
reason: errors.map(e => e.message).join(', '),
valid: errors.length === 0,
}
},
},
},
'comet-api-speech': buildOpenAICompatibleProvider({
id: 'comet-api-speech',
name: 'CometAPI Speech',
Expand Down Expand Up @@ -2198,6 +2404,7 @@ export const useProvidersStore = defineStore('providers', () => {
deleteProvider,
availableProviders,
configuredProviders,
providerRuntimeState,
providerMetadata,
getProviderMetadata,
getTranscriptionFeatures,
Expand Down