@@ -32,7 +32,7 @@ backendTrafficPolicy:
3232backendDefaults :
3333 fireworks : &fireworksBackend
3434 schema : OpenAI
35- prefix : /inference/v1
35+ prefix : " /inference/v1"
3636 fqdn :
3737 hostname : api.fireworks.ai
3838 port : 443
@@ -213,7 +213,7 @@ backends:
213213 name : google-ai-studio-api-key-02
214214
215215models :
216- # Fireworks AI Models
216+ # Fireworks AI Models
217217 # qwen3-coder-480b-a35b-instruct:
218218 # <<: *fireworksModelRateLimits
219219 # backends:
@@ -343,16 +343,16 @@ models:
343343 << : *fwBackendSecondary
344344 modelNameOverride : " accounts/fireworks/models/qwen3-235b-a22b"
345345
346- glm-4p6 :
347- << : *fireworksModelRateLimits
348- envoyTokenRateLimits : *tl1500_35m_420m
349- backends :
350- fw-01 :
351- << : *fwBackendPrimary
352- modelNameOverride : " accounts/fireworks/models/glm-4p6"
353- fw-02 :
354- << : *fwBackendSecondary
355- modelNameOverride : " accounts/fireworks/models/glm-4p6"
346+ # glm-4p6:
347+ # <<: *fireworksModelRateLimits
348+ # envoyTokenRateLimits: *tl1500_35m_420m
349+ # backends:
350+ # fw-01:
351+ # <<: *fwBackendPrimary
352+ # modelNameOverride: "accounts/fireworks/models/glm-4p6"
353+ # fw-02:
354+ # <<: *fwBackendSecondary
355+ # modelNameOverride: "accounts/fireworks/models/glm-4p6"
356356
357357 # glm-4p5:
358358 # <<: *fireworksModelRateLimits
@@ -364,6 +364,116 @@ models:
364364 # <<: *fwBackendSecondary
365365 # modelNameOverride: "accounts/fireworks/models/glm-4p5"
366366
367+ glm-5 :
368+ << : *fireworksModelRateLimits
369+ envoyTokenRateLimits : *tl1500_35m_420m
370+ backends :
371+ fw-01 :
372+ << : *fwBackendPrimary
373+ modelNameOverride : " accounts/fireworks/models/glm-5"
374+ fw-02 :
375+ << : *fwBackendSecondary
376+ modelNameOverride : " accounts/fireworks/models/glm-5"
377+
378+ qwen3-reranker-8b :
379+ << : *fireworksModelRateLimits
380+ envoyTokenRateLimits : *tl1500_35m_420m
381+ backends :
382+ fw-01 :
383+ << : *fwBackendPrimary
384+ modelNameOverride : " accounts/fireworks/models/qwen3-reranker-8b"
385+ fw-02 :
386+ << : *fwBackendSecondary
387+ modelNameOverride : " accounts/fireworks/models/qwen3-reranker-8b"
388+
389+ qwen3-embedding-8b :
390+ << : *fireworksModelRateLimits
391+ envoyTokenRateLimits : *tl1500_35m_420m
392+ backends :
393+ fw-01 :
394+ << : *fwBackendPrimary
395+ modelNameOverride : " accounts/fireworks/models/qwen3-embedding-8b"
396+ fw-02 :
397+ << : *fwBackendSecondary
398+ modelNameOverride : " accounts/fireworks/models/qwen3-embedding-8b"
399+
400+ kimi-k2p5 :
401+ << : *fireworksModelRateLimits
402+ envoyTokenRateLimits : *tl1500_35m_420m
403+ backends :
404+ fw-01 :
405+ << : *fwBackendPrimary
406+ modelNameOverride : " accounts/fireworks/models/kimi-k2p5"
407+ fw-02 :
408+ << : *fwBackendSecondary
409+ modelNameOverride : " accounts/fireworks/models/kimi-k2p5"
410+
411+ # playground-v2:
412+ # <<: *fireworksModelRateLimits
413+ # envoyTokenRateLimits: *tl1500_35m_420m
414+ # backends:
415+ # fw-01:
416+ # <<: *fwBackendPrimary
417+ # modelNameOverride: "accounts/fireworks/models/playground-v2-5-1024px-aesthetic"
418+ # fw-02:
419+ # <<: *fwBackendSecondary
420+ # modelNameOverride: "accounts/fireworks/models/playground-v2-5-1024px-aesthetic"
421+
422+ fireworks-asr-v2 :
423+ << : *fireworksModelRateLimits
424+ envoyTokenRateLimits : *tl1500_35m_420m
425+ backends :
426+ fw-01 :
427+ << : *fwBackendPrimary
428+ modelNameOverride : " accounts/fireworks/models/fireworks-asr-v2"
429+ fw-02 :
430+ << : *fwBackendSecondary
431+ modelNameOverride : " accounts/fireworks/models/fireworks-asr-v2"
432+
433+ whisper-v3 :
434+ << : *fireworksModelRateLimits
435+ envoyTokenRateLimits : *tl1500_35m_420m
436+ backends :
437+ fw-01 :
438+ << : *fwBackendPrimary
439+ modelNameOverride : " accounts/fireworks/models/whisper-v3"
440+ fw-02 :
441+ << : *fwBackendSecondary
442+ modelNameOverride : " accounts/fireworks/models/whisper-v3"
443+
444+ whisper-v3-turbo :
445+ << : *fireworksModelRateLimits
446+ envoyTokenRateLimits : *tl1500_35m_420m
447+ backends :
448+ fw-01 :
449+ << : *fwBackendPrimary
450+ modelNameOverride : " accounts/fireworks/models/whisper-v3-turbo"
451+ fw-02 :
452+ << : *fwBackendSecondary
453+ modelNameOverride : " accounts/fireworks/models/whisper-v3-turbo"
454+
455+ minimax-m2p5 :
456+ << : *fireworksModelRateLimits
457+ envoyTokenRateLimits : *tl1500_35m_420m
458+ backends :
459+ fw-01 :
460+ << : *fwBackendPrimary
461+ modelNameOverride : " accounts/fireworks/models/minimax-m2p5"
462+ fw-02 :
463+ << : *fwBackendSecondary
464+ modelNameOverride : " accounts/fireworks/models/minimax-m2p5"
465+
466+ # stable-diffusion-xl:
467+ # <<: *fireworksModelRateLimits
468+ # envoyTokenRateLimits: *tl1500_35m_420m
469+ # backends:
470+ # fw-01:
471+ # <<: *fwBackendPrimary
472+ # modelNameOverride: "accounts/fireworks/models/stable-diffusion-xl-1024-v1-0"
473+ # fw-02:
474+ # <<: *fwBackendSecondary
475+ # modelNameOverride: "accounts/fireworks/models/stable-diffusion-xl-1024-v1-0"
476+
367477 deepseek-v3p1-terminus :
368478 << : *fireworksModelRateLimits
369479 envoyTokenRateLimits : *tl1500_35m_420m
@@ -557,16 +667,16 @@ models:
557667 << : *vertexBackendSecondary
558668 modelNameOverride : " gemini-2.5-pro"
559669
560- gemini-2-5-pro-reasoning-1024 :
561- << : *geminiModelRateLimits
562- envoyTokenRateLimits : *tl1000_20m_240m
563- backends :
564- vertex-ai-01 :
565- << : *vertexBackendPrimary
566- modelNameOverride : " gemini-2.5-pro"
567- vertex-ai-02 :
568- << : *vertexBackendSecondary
569- modelNameOverride : " gemini-2.5-pro"
670+ # gemini-2-5-pro-reasoning-1024:
671+ # <<: *geminiModelRateLimits
672+ # envoyTokenRateLimits: *tl1000_20m_240m
673+ # backends:
674+ # vertex-ai-01:
675+ # <<: *vertexBackendPrimary
676+ # modelNameOverride: "gemini-2.5-pro"
677+ # vertex-ai-02:
678+ # <<: *vertexBackendSecondary
679+ # modelNameOverride: "gemini-2.5-pro"
570680
571681 gemini-3-pro-image-preview :
572682 << : *geminiModelRateLimits
@@ -611,13 +721,13 @@ models:
611721 << : *googleAiStudioBackendSecondary
612722 modelNameOverride : " gemini-3-flash-preview"
613723
614- gemini-3-flash-thinking :
615- << : *geminiModelRateLimits
616- envoyTokenRateLimits : *tl800_15m_180m
617- backends :
618- google-ai-studio-01 :
619- << : *googleAiStudioBackendPrimary
620- modelNameOverride : " gemini-3-flash-preview"
621- google-ai-studio-02 :
622- << : *googleAiStudioBackendSecondary
623- modelNameOverride : " gemini-3-flash-preview"
724+ # gemini-3-flash-thinking:
725+ # <<: *geminiModelRateLimits
726+ # envoyTokenRateLimits: *tl800_15m_180m
727+ # backends:
728+ # google-ai-studio-01:
729+ # <<: *googleAiStudioBackendPrimary
730+ # modelNameOverride: "gemini-3-flash-preview"
731+ # google-ai-studio-02:
732+ # <<: *googleAiStudioBackendSecondary
733+ # modelNameOverride: "gemini-3-flash-preview"
0 commit comments