Skip to content

Commit 2e88db3

Browse files
authored
feat(ai-proxy): add new routes for 3.13 (#3554)
1 parent 7548dd2 commit 2e88db3

File tree

8 files changed

+337
-59
lines changed

8 files changed

+337
-59
lines changed

app/_data/plugins/ai-proxy.yaml

Lines changed: 206 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,65 @@ providers:
55
chat:
66
supported: true
77
streaming: true
8-
upstream_path: 'Use the LLM <code>chat</code> upstream path'
8+
upstream_path: 'Uses the <code>Converse</code> and <code>ConverseStream</code> API'
99
route_type: 'llm/v1/chat'
1010
model_example: '<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html">Use the model name for the specific LLM provider</a>'
1111
min_version: '3.8'
1212
completions:
1313
supported: true
1414
streaming: true
15-
upstream_path: 'Use the LLM <code>completions</code> upstream path'
15+
upstream_path: 'Uses the <code>Converse</code> and <code>ConverseStream</code> API'
1616
route_type: 'llm/v1/completions'
1717
model_example: '<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html">Use the model name for the specific LLM provider</a>'
1818
min_version: '3.8'
1919
embeddings:
2020
supported: true
2121
streaming: false
22-
upstream_path: 'Use the LLM <code>embeddings</code> upstream path'
22+
upstream_path: 'Uses the <code>InvokeModel</code> and <code>InvokeWithResponseStream</code> API'
2323
route_type: 'llm/v1/embeddings'
2424
model_example: '<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html">Use the model name for the specific LLM provider</a>'
2525
min_version: '3.11'
26+
batches: # Native format from SDK only
27+
supported: 'n/a'
28+
streaming: false
29+
upstream_path: 'Uses the <code>ModelInvocationJob</code> API'
30+
route_type: 'llm/v1/batches'
31+
model_example: 'n/a'
32+
min_version: ''
33+
note:
34+
content: 'Batches processing for Bedrock is supported in the native format from SDK only'
35+
files:
36+
supported: 'n/a'
37+
streaming: false
38+
upstream_path: '/openai/files'
39+
route_type: 'llm/v1/files'
40+
model_example: 'n/a'
41+
min_version: ''
42+
note:
43+
content: 'Bedrock does not have a dedicated Files API. File storage uses Google Cloud Storage, similar to AWS S3.'
2644
image:
2745
generations:
2846
supported: true
2947
streaming: false
30-
upstream_path: 'Use the LLM <code>image/generations</code> upstream path'
48+
upstream_path: 'Uses the <code>InvokeModel</code> API'
3149
route_type: 'image/v1/images/generations'
3250
model_example: '<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html">Use the model name for the specific LLM provider</a>'
3351
min_version: '3.11'
3452
edits:
3553
supported: true
3654
streaming: false
37-
upstream_path: 'Use the LLM <code>image/edits</code> upstream path'
55+
upstream_path: 'Uses the <code>InvokeModel</code> API'
3856
route_type: 'image/v1/images/edits'
3957
model_example: '<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html">Use the model name for the specific LLM provider</a>'
4058
min_version: '3.11'
59+
video:
60+
generations:
61+
supported: true
62+
streaming: false
63+
upstream_path: 'Uses the <code>StartAsyncInvoke</code> API'
64+
route_type: 'video/v1/videos/generations'
65+
model_example: '<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html">Use the model name for the specific LLM provider</a>'
66+
min_version: '3.13'
4167

4268
- name: 'Anthropic'
4369
url_pattern: 'https://api.anthropic.com:443/{route_type_path}'
@@ -56,6 +82,15 @@ providers:
5682
route_type: 'llm/v1/completions'
5783
model_example: 'claude-2.1'
5884
min_version: '3.6'
85+
batches: # Native format from SDK only
86+
supported: 'n/a'
87+
streaming: true
88+
upstream_path: '/v1/messages/batches'
89+
route_type: 'files/v1/batches'
90+
model_example: 'n/a'
91+
min_version: ''
92+
note:
93+
content: 'Batches processing for Antropic is supported in the native format from SDK only'
5994

6095
- name: 'Azure'
6196
url_pattern: 'https://{azure_instance}.openai.azure.com:443/openai/deployments/{deployment_name}/{route_type_path}'
@@ -153,6 +188,25 @@ providers:
153188
route_type: 'realtime/v1/realtime'
154189
model_example: 'n/a'
155190
min_version: '3.11'
191+
video:
192+
generations:
193+
supported: true
194+
streaming: false
195+
upstream_path: '/openai/v1/video/generations/jobs'
196+
route_type: 'video/v1/videos/generations'
197+
model_example: 'sora-2'
198+
min_version: '3.13'
199+
200+
- name: 'Cerebras'
201+
url_pattern: 'https://api.cerebras.ai/{route_type_path}'
202+
min_version: '3.13'
203+
chat:
204+
supported: true
205+
streaming: true
206+
upstream_path: '/v1/chat/completions'
207+
route_type: 'llm/v1/chat'
208+
model_example: 'llama3.1-8b'
209+
min_version: '3.13'
156210

157211
- name: 'Cohere'
158212
url_pattern: 'https://api.cohere.com:443/{route_type_path}'
@@ -179,104 +233,196 @@ providers:
179233
model_example: 'embed-english-v3.0'
180234
min_version: '3.11'
181235

236+
- name: 'Dashscope'
237+
url_pattern: 'https://dashscope.aliyuncs.com or https://dashscope-intl.aliyuncs.com'
238+
min_version: '3.13'
239+
chat:
240+
supported: true
241+
streaming: true
242+
upstream_path: '/compatible-mode/v1/chat/completions'
243+
route_type: 'llm/v1/chat'
244+
model_example: 'qwen-plus'
245+
min_version: '3.13'
246+
embeddings:
247+
supported: true
248+
streaming: false
249+
upstream_path: '/compatible-mode/v1/embeddings'
250+
route_type: 'llm/v1/embeddings'
251+
model_example: 'text-embedding-v1'
252+
min_version: '3.13'
253+
image:
254+
generations:
255+
supported: true
256+
streaming: false
257+
upstream_path: '/api/v1/services/aigc/multimodal-generation/generation'
258+
route_type: 'image/v1/images/generations'
259+
model_example: 'qwen-image-plus'
260+
min_version: '3.13'
261+
edits:
262+
supported: true
263+
streaming: false
264+
upstream_path: '/api/v1/services/aigc/image2image/image-synthesis'
265+
route_type: 'image/v1/images/edits'
266+
model_example: 'qwen-image-plus'
267+
min_version: '3.13'
268+
182269
- name: 'Gemini'
183270
url_pattern: 'https://generativelanguage.googleapis.com'
184271
min_version: '3.8'
185272
chat:
186273
supported: true
187274
streaming: true
188-
upstream_path: 'llm/v1/chat'
275+
upstream_path: 'Uses <code>generateContent</code> API'
189276
route_type: 'llm/v1/chat'
190277
model_example: 'gemini-2.0-flash'
191278
min_version: '3.8'
192-
193279
embeddings:
194280
supported: true
195281
streaming: false
196-
upstream_path: 'llm/v1/embeddings'
282+
upstream_path: 'Uses <code>batchEmbedContents</code> API'
197283
route_type: 'llm/v1/embeddings'
198284
model_example: 'text-embedding-004'
199285
min_version: '3.11'
286+
files: # Native format from SDK only
287+
supported: 'n/a'
288+
streaming: false
289+
upstream_path: 'Uses <code>uploadFile</code> and <code>files</code> API'
290+
route_type: 'llm/v1/files'
291+
model_example: 'n/a'
292+
min_version: ''
293+
note:
294+
content: 'Files processing for Gemini is supported in the native format from SDK only'
295+
batches: # Native format from SDK only
296+
supported: 'n/a'
297+
streaming: false
298+
upstream_path: 'Uses <code>batches</code> API'
299+
route_type: 'llm/v1/batches'
300+
model_example: 'n/a'
301+
min_version: ''
302+
note:
303+
content: 'Batches processing for Gemini is supported in the native format from SDK only'
200304
image:
201305
generations:
202306
supported: true
203307
streaming: false
204-
upstream_path: 'image/v1/images/generations'
308+
upstream_path: 'Uses <code>generateContent</code> API'
205309
route_type: 'image/v1/images/generations'
206310
model_example: 'gemini-2.0-flash-preview-image-generation<sup>1</sup>'
207311
min_version: '3.11'
208312
edits:
209313
supported: true
210314
streaming: false
211-
upstream_path: 'image/v1/images/edits'
315+
upstream_path: 'Uses <code>generateContent</code> API'
212316
route_type: 'image/v1/images/edits'
213317
model_example: 'gemini-2.0-flash-preview-image-generation<sup>1</sup>'
214318
min_version: '3.11'
319+
realtime: # Native format from SDK only
320+
supported: true
321+
streaming: true
322+
upstream_path: 'Uses <code>BidiGenerateContent</code> API'
323+
route_type: 'realtime/v1/realtime'
324+
model_example: 'gemini-live-2.5-flash-preview-native-audio-09-2025'
325+
min_version: '3.13'
326+
note:
327+
content: 'Realtime processing for Gemini is supported in the native format from SDK only'
328+
video:
329+
generations:
330+
supported: true
331+
streaming: false
332+
upstream_path: 'Uses <code>predictLongRunning</code> API'
333+
route_type: 'video/v1/videos/generations'
334+
model_example: 'veo-3.1-generate-001'
335+
min_version: '3.13'
215336

216337
- name: 'Gemini Vertex'
217338
url_pattern: 'https://aiplatform.googleapis.com/'
218339
min_version: '3.11'
219340
chat:
220341
supported: true
221342
streaming: true
222-
upstream_path: 'llm/v1/chat'
343+
upstream_path: 'Uses <code>generateContent</code> API'
223344
route_type: 'llm/v1/chat'
224345
model_example: 'gemini-2.0-flash'
225346
min_version: '3.8'
226347
completions:
227348
supported: true
228349
streaming: false
229-
upstream_path: 'llm/v1/completions'
350+
upstream_path: 'Uses <code>generateContent</code> API'
230351
route_type: 'llm/v1/completions'
231352
model_example: 'gemini-2.0-flash'
232353
min_version: '3.8'
233354
embeddings:
234355
supported: true
235356
streaming: false
236-
upstream_path: 'llm/v1/embeddings'
357+
upstream_path: 'Uses <code>generateContent</code> API'
237358
route_type: 'llm/v1/embeddings'
238359
model_example: 'text-embedding-004'
239360
min_version: '3.11'
361+
files:
362+
supported: 'n/a'
363+
streaming: false
364+
upstream_path: '/openai/files'
365+
route_type: 'llm/v1/files'
366+
model_example: 'n/a'
367+
min_version: '3.11'
368+
note:
369+
content: 'Gemini Vertex does not have a dedicated Files API. File storage uses Google Cloud Storage, similar to AWS S3.'
370+
batches:
371+
supported: true
372+
streaming: false
373+
upstream_path: 'Uses <code>batchPredictionJobs</code> API'
374+
route_type: 'llm/v1/batches'
375+
model_example: 'n/a'
376+
min_version: '3.13'
240377
image:
241378
generations:
242379
supported: true
243380
streaming: false
244-
upstream_path: 'image/v1/images/generations'
381+
upstream_path: 'Uses <code>generateContent</code> API'
245382
route_type: 'image/v1/images/generations'
246383
model_example: 'gemini-2.0-flash-preview-image-generation<sup>1</sup>'
247384
min_version: '3.11'
248385
edits:
249386
supported: true
250387
streaming: false
251-
upstream_path: 'image/v1/images/edits'
388+
upstream_path: 'Uses <code>generateContent</code> API'
252389
route_type: 'image/v1/images/edits'
253390
model_example: 'gemini-2.0-flash-preview-image-generation<sup>1</sup>'
254391
min_version: '3.11'
392+
video:
393+
generations:
394+
supported: true
395+
streaming: false
396+
upstream_path: 'Uses <code>predictLongRunning</code> API'
397+
route_type: 'video/v1/videos/generations'
398+
model_example: 'veo-3.1-generate-001'
399+
min_version: '3.13'
255400

256401
- name: 'Hugging Face'
257402
url_pattern: 'https://api-inference.huggingface.co'
258403
min_version: '3.9'
259404
chat:
260405
supported: true
261406
streaming: true
262-
upstream_path: '/models/{model_provider}/{model_name}'
407+
upstream_path: '/v1/chat/completions'
263408
route_type: 'llm/v1/chat'
264409
model_example: '<a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">Use the model name for the specific LLM provider</a>'
265410
min_version: '3.9'
266-
completions:
267-
supported: true
268-
streaming: true
269-
upstream_path: '/models/{model_provider}/{model_name}'
270-
route_type: 'llm/v1/completions'
271-
model_example: '<a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">Use the model name for the specific LLM provider</a>'
272-
min_version: '3.9'
273411
embeddings:
274412
supported: true
275413
streaming: false
276-
upstream_path: '/models/{model_provider}/{model_name}'
414+
upstream_path: '/hf-inference/models/{model_name}/pipeline/feature-extraction'
277415
route_type: 'llm/v1/embeddings'
278416
model_example: '<a href="https://huggingface.co/models?pipeline_tag=feature-extraction">Use the embedding model name</a>'
279417
min_version: '3.11'
418+
video:
419+
generations:
420+
supported: true
421+
streaming: false
422+
upstream_path: '/v1/videos'
423+
route_type: 'video/v1/videos/generations'
424+
model_example: '<a href="https://huggingface.co/models?pipeline_tag=video-generation">Use the video generation model name</a>'
425+
min_version: '3.13'
280426

281427
- name: 'Llama2'
282428
formats: 'supports Llama2 and Llama3 models and raw, OLLAMA, and OpenAI formats'
@@ -311,21 +457,14 @@ providers:
311457
chat:
312458
supported: true
313459
streaming: true
314-
upstream_path: 'User-defined'
460+
upstream_path: '/v1/chat/completions or user-defined'
315461
route_type: 'llm/v1/chat'
316462
model_example: 'mistral-tiny'
317463
min_version: '3.6'
318-
completions:
319-
supported: true
320-
streaming: true
321-
upstream_path: 'User-defined'
322-
route_type: 'llm/v1/completions'
323-
model_example: 'mistral-tiny'
324-
min_version: '3.6'
325464
embeddings:
326465
supported: true
327466
streaming: false
328-
upstream_path: 'User-defined'
467+
upstream_path: '/v1/embeddings or user-defined'
329468
route_type: 'llm/v1/embeddings'
330469
model_example: 'mistral-embed'
331470
min_version: '3.11'
@@ -427,6 +566,40 @@ providers:
427566
route_type: 'realtime/v1/realtime'
428567
model_example: 'gpt-4o'
429568
min_version: '3.11'
569+
video:
570+
generations:
571+
supported: true
572+
streaming: false
573+
upstream_path: 'Use the LLM <code>image/generations</code> upstream path'
574+
route_type: 'video/v1/videos/generations'
575+
model_example: 'sora-2'
576+
min_version: '3.13'
577+
578+
- name: 'xAI'
579+
url_pattern: 'https://api.x.ai/{route_type_path}'
580+
min_version: '3.13'
581+
chat:
582+
supported: true
583+
streaming: true
584+
upstream_path: '/v1/chat/completions'
585+
route_type: 'llm/v1/chat'
586+
model_example: 'grok-4-1-fast-reasoning'
587+
min_version: '3.13'
588+
responses:
589+
supported: true
590+
streaming: false
591+
upstream_path: '/v1/responses'
592+
route_type: 'llm/v1/responses'
593+
model_example: 'grok-4-1-fast-reasoning'
594+
min_version: '3.13'
595+
image:
596+
generations:
597+
supported: true
598+
streaming: false
599+
upstream_path: '/v1/images/generations'
600+
route_type: 'image/v1/images/generations'
601+
model_example: 'grok-2-image-1212'
602+
min_version: '3.13'
430603

431604
- name: 'xAI'
432605
url_pattern: 'https://api.x.ai:443/{route_type_path}'

0 commit comments

Comments
 (0)