Skip to content

Commit f426cd1

Browse files
Feat: Add new fireworks models (#41)
* feat: add TLS and HTTPS redirect configuration options * feat(tls): enable HTTPS on Envoy Gateway with cert-manager and HTTP redirect * feat(values): define HTTP and HTTPS Gateway listeners via values.yaml not helper.tpl * feat: add configurable namespace for all namespaced resources * feat: switch certificate to the envoy pod namespace * chore: swapped issuer.enabled with issuer.create * Fix HTTP to HTTPS redirection by adding hostname to HTTPRoute configuration * Add api.ai.camer.digital hostname to HTTPRoute configuration * Update modelNameOverride for gpt-image-1 OpenAI backends * Add HTTPRoute configuration for new Fireworks image gen AI models * Update Fireworks model configurations and add new models with rate limits * Remove HTTPRoute configuration for image generation and update rate limits for token usage * Comment out unused model configurations in values.yaml --------- Co-authored-by: Stephane SEGNING LAMBOU <selastlambou@gmail.com>
1 parent dc95be6 commit f426cd1

File tree

1 file changed

+142
-32
lines changed

1 file changed

+142
-32
lines changed

charts/models/values.yaml

Lines changed: 142 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ backendTrafficPolicy:
3232
backendDefaults:
3333
fireworks: &fireworksBackend
3434
schema: OpenAI
35-
prefix: /inference/v1
35+
prefix: "/inference/v1"
3636
fqdn:
3737
hostname: api.fireworks.ai
3838
port: 443
@@ -213,7 +213,7 @@ backends:
213213
name: google-ai-studio-api-key-02
214214

215215
models:
216-
# Fireworks AI Models
216+
# Fireworks AI Models
217217
# qwen3-coder-480b-a35b-instruct:
218218
# <<: *fireworksModelRateLimits
219219
# backends:
@@ -343,16 +343,16 @@ models:
343343
<<: *fwBackendSecondary
344344
modelNameOverride: "accounts/fireworks/models/qwen3-235b-a22b"
345345

346-
glm-4p6:
347-
<<: *fireworksModelRateLimits
348-
envoyTokenRateLimits: *tl1500_35m_420m
349-
backends:
350-
fw-01:
351-
<<: *fwBackendPrimary
352-
modelNameOverride: "accounts/fireworks/models/glm-4p6"
353-
fw-02:
354-
<<: *fwBackendSecondary
355-
modelNameOverride: "accounts/fireworks/models/glm-4p6"
346+
# glm-4p6:
347+
# <<: *fireworksModelRateLimits
348+
# envoyTokenRateLimits: *tl1500_35m_420m
349+
# backends:
350+
# fw-01:
351+
# <<: *fwBackendPrimary
352+
# modelNameOverride: "accounts/fireworks/models/glm-4p6"
353+
# fw-02:
354+
# <<: *fwBackendSecondary
355+
# modelNameOverride: "accounts/fireworks/models/glm-4p6"
356356

357357
# glm-4p5:
358358
# <<: *fireworksModelRateLimits
@@ -364,6 +364,116 @@ models:
364364
# <<: *fwBackendSecondary
365365
# modelNameOverride: "accounts/fireworks/models/glm-4p5"
366366

367+
glm-5:
368+
<<: *fireworksModelRateLimits
369+
envoyTokenRateLimits: *tl1500_35m_420m
370+
backends:
371+
fw-01:
372+
<<: *fwBackendPrimary
373+
modelNameOverride: "accounts/fireworks/models/glm-5"
374+
fw-02:
375+
<<: *fwBackendSecondary
376+
modelNameOverride: "accounts/fireworks/models/glm-5"
377+
378+
qwen3-reranker-8b:
379+
<<: *fireworksModelRateLimits
380+
envoyTokenRateLimits: *tl1500_35m_420m
381+
backends:
382+
fw-01:
383+
<<: *fwBackendPrimary
384+
modelNameOverride: "accounts/fireworks/models/qwen3-reranker-8b"
385+
fw-02:
386+
<<: *fwBackendSecondary
387+
modelNameOverride: "accounts/fireworks/models/qwen3-reranker-8b"
388+
389+
qwen3-embedding-8b:
390+
<<: *fireworksModelRateLimits
391+
envoyTokenRateLimits: *tl1500_35m_420m
392+
backends:
393+
fw-01:
394+
<<: *fwBackendPrimary
395+
modelNameOverride: "accounts/fireworks/models/qwen3-embedding-8b"
396+
fw-02:
397+
<<: *fwBackendSecondary
398+
modelNameOverride: "accounts/fireworks/models/qwen3-embedding-8b"
399+
400+
kimi-k2p5:
401+
<<: *fireworksModelRateLimits
402+
envoyTokenRateLimits: *tl1500_35m_420m
403+
backends:
404+
fw-01:
405+
<<: *fwBackendPrimary
406+
modelNameOverride: "accounts/fireworks/models/kimi-k2p5"
407+
fw-02:
408+
<<: *fwBackendSecondary
409+
modelNameOverride: "accounts/fireworks/models/kimi-k2p5"
410+
411+
# playground-v2:
412+
# <<: *fireworksModelRateLimits
413+
# envoyTokenRateLimits: *tl1500_35m_420m
414+
# backends:
415+
# fw-01:
416+
# <<: *fwBackendPrimary
417+
# modelNameOverride: "accounts/fireworks/models/playground-v2-5-1024px-aesthetic"
418+
# fw-02:
419+
# <<: *fwBackendSecondary
420+
# modelNameOverride: "accounts/fireworks/models/playground-v2-5-1024px-aesthetic"
421+
422+
fireworks-asr-v2:
423+
<<: *fireworksModelRateLimits
424+
envoyTokenRateLimits: *tl1500_35m_420m
425+
backends:
426+
fw-01:
427+
<<: *fwBackendPrimary
428+
modelNameOverride: "accounts/fireworks/models/fireworks-asr-v2"
429+
fw-02:
430+
<<: *fwBackendSecondary
431+
modelNameOverride: "accounts/fireworks/models/fireworks-asr-v2"
432+
433+
whisper-v3:
434+
<<: *fireworksModelRateLimits
435+
envoyTokenRateLimits: *tl1500_35m_420m
436+
backends:
437+
fw-01:
438+
<<: *fwBackendPrimary
439+
modelNameOverride: "accounts/fireworks/models/whisper-v3"
440+
fw-02:
441+
<<: *fwBackendSecondary
442+
modelNameOverride: "accounts/fireworks/models/whisper-v3"
443+
444+
whisper-v3-turbo:
445+
<<: *fireworksModelRateLimits
446+
envoyTokenRateLimits: *tl1500_35m_420m
447+
backends:
448+
fw-01:
449+
<<: *fwBackendPrimary
450+
modelNameOverride: "accounts/fireworks/models/whisper-v3-turbo"
451+
fw-02:
452+
<<: *fwBackendSecondary
453+
modelNameOverride: "accounts/fireworks/models/whisper-v3-turbo"
454+
455+
minimax-m2p5:
456+
<<: *fireworksModelRateLimits
457+
envoyTokenRateLimits: *tl1500_35m_420m
458+
backends:
459+
fw-01:
460+
<<: *fwBackendPrimary
461+
modelNameOverride: "accounts/fireworks/models/minimax-m2p5"
462+
fw-02:
463+
<<: *fwBackendSecondary
464+
modelNameOverride: "accounts/fireworks/models/minimax-m2p5"
465+
466+
# stable-diffusion-xl:
467+
# <<: *fireworksModelRateLimits
468+
# envoyTokenRateLimits: *tl1500_35m_420m
469+
# backends:
470+
# fw-01:
471+
# <<: *fwBackendPrimary
472+
# modelNameOverride: "accounts/fireworks/models/stable-diffusion-xl-1024-v1-0"
473+
# fw-02:
474+
# <<: *fwBackendSecondary
475+
# modelNameOverride: "accounts/fireworks/models/stable-diffusion-xl-1024-v1-0"
476+
367477
deepseek-v3p1-terminus:
368478
<<: *fireworksModelRateLimits
369479
envoyTokenRateLimits: *tl1500_35m_420m
@@ -557,16 +667,16 @@ models:
557667
<<: *vertexBackendSecondary
558668
modelNameOverride: "gemini-2.5-pro"
559669

560-
gemini-2-5-pro-reasoning-1024:
561-
<<: *geminiModelRateLimits
562-
envoyTokenRateLimits: *tl1000_20m_240m
563-
backends:
564-
vertex-ai-01:
565-
<<: *vertexBackendPrimary
566-
modelNameOverride: "gemini-2.5-pro"
567-
vertex-ai-02:
568-
<<: *vertexBackendSecondary
569-
modelNameOverride: "gemini-2.5-pro"
670+
# gemini-2-5-pro-reasoning-1024:
671+
# <<: *geminiModelRateLimits
672+
# envoyTokenRateLimits: *tl1000_20m_240m
673+
# backends:
674+
# vertex-ai-01:
675+
# <<: *vertexBackendPrimary
676+
# modelNameOverride: "gemini-2.5-pro"
677+
# vertex-ai-02:
678+
# <<: *vertexBackendSecondary
679+
# modelNameOverride: "gemini-2.5-pro"
570680

571681
gemini-3-pro-image-preview:
572682
<<: *geminiModelRateLimits
@@ -611,13 +721,13 @@ models:
611721
<<: *googleAiStudioBackendSecondary
612722
modelNameOverride: "gemini-3-flash-preview"
613723

614-
gemini-3-flash-thinking:
615-
<<: *geminiModelRateLimits
616-
envoyTokenRateLimits: *tl800_15m_180m
617-
backends:
618-
google-ai-studio-01:
619-
<<: *googleAiStudioBackendPrimary
620-
modelNameOverride: "gemini-3-flash-preview"
621-
google-ai-studio-02:
622-
<<: *googleAiStudioBackendSecondary
623-
modelNameOverride: "gemini-3-flash-preview"
724+
# gemini-3-flash-thinking:
725+
# <<: *geminiModelRateLimits
726+
# envoyTokenRateLimits: *tl800_15m_180m
727+
# backends:
728+
# google-ai-studio-01:
729+
# <<: *googleAiStudioBackendPrimary
730+
# modelNameOverride: "gemini-3-flash-preview"
731+
# google-ai-studio-02:
732+
# <<: *googleAiStudioBackendSecondary
733+
# modelNameOverride: "gemini-3-flash-preview"

0 commit comments

Comments
 (0)