Update config examples

juntao · juntao · commit bd3345b76a6b · 2025-12-13T00:17:46.000-06:00
Signed-off-by: Michael Yuan &lt;michael@secondstate.io&gt;
diff --git a/doc/docs/config/asr.md b/doc/docs/config/asr.md
@@ -6,15 +6,23 @@ sidebar_position: 2
 
 The EchoKit server supports popular ASR providers.
 
+| Platform  | URL example | Notes |
+| ------------- | ------------- | ---- |
+| `openai`  | `https://api.openai.com/v1/audio/transcriptions`  | Supports endpoint URLs from any OpenAI-compatible services, such as Groq and Open Router. |
+| `paraformer_v2`  | `wss://dashscope.aliyuncs.com/api-ws/v1/inference`  | A Web socket streaming ASR service endpoint supported by the ALi Cloud |
+
 
 ## OpenAI and compatible services
 
 The OpenAI `/v1/audio/transcriptions` API is supported by OpenAI, Open Router, Groq, Azure, AWS and many other providers.
+This is a non-streaming service endpoint, meaning that EchoKit server must determine when the user is done
+talking (via an VAD service), and then submit the entire audio to get a transscription.
 
 OpenAI example
 
 ```toml
 [asr]
+platform = "openai"
 url = "https://api.openai.com/v1/audio/transcriptions"
 api_key = "sk_ABCD"
 model = "gpt-4o-mini-transcribe"
@@ -26,6 +34,7 @@ Groq example
 
 ```toml
 [asr]
+platform = "openai"
 url = "https://api.groq.com/openai/v1/audio/transcriptions"
 api_key = "gsk_ABCD"
 model = "whisper-large-v3"
@@ -44,6 +53,8 @@ send back text and voice activity events as they happen. There is no need to a s
 
 ```toml
 [asr]
+platform = "paraformer_v2"
+url = "wss://dashscope.aliyuncs.com/api-ws/v1/inference"
 paraformer_token = "sk-API-KEY"
 ```
 
diff --git a/doc/docs/config/gemini-live.md b/doc/docs/config/gemini-live.md
@@ -65,7 +65,7 @@ hello_wav = "hello.wav"
 api_key = "your_api_key_here"
 
 [tts]
-platform = "StreamGSV"
+platform = "stream_gsv"
 url = "http://localhost:9094/v1/audio/stream_speech"
 speaker = "cooper"
 
diff --git a/doc/docs/config/intro.md b/doc/docs/config/intro.md
@@ -45,6 +45,12 @@ The rest of the `config.toml` specifies how to use different AI services. Each s
 * The `[llm]` section configures the [large language model](llm.md) services, including [tools](llm-tools.md) and [MCP actions](mcp.md).
 * The `[tts]` section configures the [text-to-voice](tts.md) services.
 
+It is important to note that each of sections has those fields.
+
+* A `platform` field that designates the service protocol. A common example is `openai` for OpenAI compatible API endpoints.
+* A `url` field for the service URL endpoint. It is typically a `https://` or `wss://` URL. The latter is the Web Socket address for streaming services.
+* Optional fields that are specific to the `platform`. That includes `api_key`, `model`, and others.
+
 ## Complete Configuration Example
 
 You will need a free [API key from Groq](https://console.groq.com/keys).
@@ -54,23 +60,25 @@ You will need a free [API key from Groq](https://console.groq.com/keys).
 addr = "0.0.0.0:8080"
 hello_wav = "hello.wav"
 
-# Speech recognition
+# Speech recognition using the OpenAI transcriptions API, but hosted by Groq (instead of OpenAI)
 [asr]
+platform = "openai"
 url = "https://api.groq.com/openai/v1/audio/transcriptions"
 lang = "en"
 api_key = "gsk_your_api_key_here"
 model = "whisper-large-v3-turbo"
 
-# Language model
+# Language model using the OpenAI chat completions API, but hosted by Groq (instead of OpenAI)
 [llm]
-llm_chat_url = "https://api.groq.com/openai/v1/chat/completions"
+platform = "openai_chat"
+url = "https://api.groq.com/openai/v1/chat/completions"
 api_key = "gsk_your_api_key_here"
 model = "gpt-oss-20b"
 history = 10
 
-# Text-to-speech
+# Text-to-speech using the OpenAI speech API, but hosted by Groq (instead of OpenAI)
 [tts]
-platform = "Groq"
+platform = "openai"
 url = "https://api.groq.com/openai/v1/audio/speech"
 api_key = "gsk_your_api_key_here"
 model = "playai-tts"
diff --git a/doc/docs/config/llm-tools.md b/doc/docs/config/llm-tools.md
@@ -15,7 +15,8 @@ Since it is a stateful API, the EchoKit server only needs to send the last user
 
 ```toml
 [llm]
-llm_chat_url = "https://api.openai.com/v1/responses"
+platform = "openai_responses"
+url = "https://api.openai.com/v1/responses"
 api_key = "sk_ABCD"
 model = "gpt-5-nano"
 
@@ -43,7 +44,8 @@ The actual implementation of the `web_search_preview` tool is provided by OpenAI
 
 ```toml
 [llm]
-llm_chat_url = "https://api.openai.com/v1/responses"
+platform = "openai_responses"
+url = "https://api.openai.com/v1/responses"
 api_key = "sk_ABCD"
 model = "gpt-5-nano"
 
@@ -69,7 +71,8 @@ provides a `x_search` tool to specifically search for posts in x.com.
 
 ```toml
 [llm]
-llm_chat_url = "https://api.x.ai/v1/responses"
+platform = "openai_responses"
+url = "https://api.x.ai/v1/responses"
 api_key = "xai_ABCD"
 model = "grok-4-1-fast-non-reasoning"
 
@@ -95,7 +98,8 @@ Again the name of the build-in search tool is different. It is called `browser_s
 
 ```toml
 [llm]
-llm_chat_url = "https://api.groq.com/openai/v1/chat/responses"
+platform = "openai_responses"
+url = "https://api.groq.com/openai/v1/chat/responses"
 api_key = "gsk_ABCD"
 model = "openai/gpt-oss-20b"
 
@@ -127,7 +131,8 @@ a response based on those tool call results.
 
 ```toml
 [llm]
-llm_chat_url = "https://api.x.ai/v1/responses"
+platform = "openai_responses"
+url = "https://api.x.ai/v1/responses"
 api_key = "xai_ABCD"
 model = "grok-4-1-fast-non-reasoning"
 
diff --git a/doc/docs/config/llm.md b/doc/docs/config/llm.md
@@ -5,15 +5,22 @@ sidebar_position: 3
 # LLM services
 
 The EchoKit server utilizes LLM services to generate responses to user queries. 
-Most popular LLM services support OpenAI's `/v1/chat/completions` API.
+Most popular LLM services support OpenAI API.
+
+| Platform  | URL example | Notes |
+| ------------- | ------------- | ---- |
+| `openai_chat`  | `https://api.openai.com/v1/chat/completions`  | The stateless `/chat/completions` API. It is the most widely supported LLM API. |
+| `openai_responses`  | `https://api.openai.com/v1/responses`  | The stateful `/responses` API. Alpha feature. |
+
 
 ## Simple example
 
 The following example configures the EchoKit server to use the OpenAI LLM service.
 
 ```toml
 [llm]
-llm_chat_url = "https://api.openai.com/v1/chat/completions"
+platform = "openai_chat"
+url = "https://api.openai.com/v1/chat/completions"
 api_key = "sk_ABCD"
 model = "gpt-5-nano"
 history = 5
@@ -78,7 +85,8 @@ We also tells the LLM to use the search tool when needed in the system prompt.
 
 ```toml
 [llm]
-llm_chat_url = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
+platform = "openai_chat"
+url = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
 api_key = "sk-API-KEY"
 model = "qwen-plus"
 history = 5
@@ -105,6 +113,6 @@ You can pass any JSON parameter supported by the LLM API provider in the `[llm.e
 
 While the stateless `/v1/chat/completions` API is widely supported, 
 OpenAI and many providers in the ecosystem have shifted their focus to the new stateful
-`/v1/responses` API. The new responses API makes it easier to support tools, icnluding web searches,
+`/v1/responses` API. The new responses API makes it easier to support tools, including web searches,
 in LLM applications. 
 
diff --git a/doc/docs/config/tts.md b/doc/docs/config/tts.md
@@ -9,6 +9,16 @@ For interactive applications, you should select a TTS service that supports stre
 Streaming allows the TTS to "speak" as the LLM returns text, instead of waiting for the LLM
 to complete and then for the TTS to synthesize the whole text.
 
+
+| Platform  | URL example | Notes |
+| ------------- | ------------- | ---- |
+| `openai`  | `https://api.openai.com/v1/audio/speech`  | Supports endpoint URLs from any OpenAI-compatible services, such as Groq and Open Router. |
+| `elevenlabs`  | `wss://api.elevenlabs.io/v1/text-to-speech`  | Supports ElevenLabs TTS endpoint URL. |
+| `fish`  | `https://api.fish.audio/v1/tts`  | Supports Fish Audio TTS endpoint URL. |
+| `stream_gsv`  | `http://localhost:9094/v1/audio/stream_speech`  | Supports self-hosted GPT-SoVITS model API server. This is a streaming TTS endpoint. |
+| `gsv`  | `http://localhost:9094/v1/audio/speech`  | Supports self-hosted GPT-SoVITS model API server. |
+| `cosyvoice`  | `wss://dashscope.aliyuncs.com/api-ws/v1/inference`  | A Web socket streaming TTS service endpoint supported by the Ali Cloud. |
+
 ## ElevenLabs streaming service
 
 ElevenLabs provide state-of-the-art TTS models for many languages. It also provides a large library
@@ -20,7 +30,8 @@ With an [API key from ElevenLabs](https://elevenlabs.io/app/developers/api-keys)
 
 ```toml
 [tts]
-platform = "Elevenlabs"
+platform = "elevenlabs"
+url = "wss://api.elevenlabs.io/v1/text-to-speech/"
 token = "sk_1234"
 voice = "YOUR-VOICE-ID"
 ```
@@ -38,7 +49,7 @@ The example below shows a streaming GTP-SoVITS server running at local host port
 
 ```toml
 [tts]
-platform = "StreamGSV"
+platform = "stream_gsv"
 url = "http://localhost:9094/v1/audio/stream_speech"
 speaker = "texan"
 ```
@@ -49,7 +60,8 @@ The [CosyVoice service](https://bailian.console.aliyun.com/) from Ali Cloud is a
 
 ```toml
 [tts]
-platform = "CosyVoice"
+platform = "cosyvoice"
+url = "wss://dashscope.aliyuncs.com/api-ws/v1/inference"
 token = "sk-API-KEY"
 speaker = "longhua_v2"
 ```
@@ -63,7 +75,8 @@ OpenAI example
 
 ```toml
 [tts]
-platform = "OpenAI"
+platform = "openai"
+url = "https://api.openai.com/v1/audio/speech"
 model = "gpt-4o-mini-tts"
 api_key = "sk_ABCD"
 voice = "ash"
@@ -73,7 +86,8 @@ Groq example
 
 ```toml
 [tts]
-platform = "Groq"
+platform = "openai"
+url = "https://api.groq.com/openai/v1/audio/speech"
 model = "playai-tts"
 api_key = "gsk_ABCD"
 voice = "Fritz-PlayAI"
diff --git a/doc/docs/get-started/echokit-server.md b/doc/docs/get-started/echokit-server.md
@@ -19,6 +19,8 @@ docker run --rm \
 The required `config.toml` file for the local EchoKit server could be the following. You will need 
 free [Groq](https://console.groq.com/keys) and [ElevenLabs](https://elevenlabs.io/app/settings/api-keys) API keys.
 
+> The `platform = "openai"` in the configuration refers to OpenAI-compatible service endpoints. Groq provides its inference service in the OpenAI protocol.
+
 ```
 addr = "0.0.0.0:8080"
 hello_wav = "hello.wav"