From 9caba3edb0239f51939d7736abc592e4cc809bc1 Mon Sep 17 00:00:00 2001 From: Dhruv Singal Date: Mon, 31 Mar 2025 10:49:37 -0700 Subject: [PATCH] move from A100 to H100 default in config.yaml --- .../BEI-baai-bge-m3-embedding-dense/config.yaml | 2 +- .../BEI-baai-bge-reranker-v2-m3-multilingual/config.yaml | 2 +- .../BEI-snowflake-snowflake-arctic-embed-l-v2.0/config.yaml | 2 +- .../Briton-qwen-qwen2-57b-a14b-moe-int4/config.yaml | 2 +- cogvlm/config.yaml | 2 +- comfyui-truss/examples/anime-style-transfer/config.yaml | 2 +- deepspeed-mii/config.yaml | 2 +- gemma/gemma-2-27b-it-vllm/config.yaml | 2 +- gemma/gemma-2-9b-it-vllm/config.yaml | 2 +- llama/llama-2-13b-chat/config.yaml | 2 +- llama/llama-2-13b/config.yaml | 2 +- llama/llama-2-70b-chat/config.yaml | 2 +- llama/llama-2-70b/config.yaml | 2 +- llama/llama-3-8b-instruct/config.yaml | 2 +- llama/llama-3_1_70b-instruct/config.yaml | 2 +- llama/llama-3_2-11b-vision-instruct/config.yaml | 2 +- llama/llama-7b-exllama-streaming/config.yaml | 2 +- llama/llama-7b-exllama/config.yaml | 2 +- llava/llava-1.6-sgl/config.yaml | 2 +- llava/llava-v1.6-34b/config.yaml | 2 +- mistral/mixtral-8x22b-trt-int8-weights-only/config.yaml | 4 ++-- mistral/mixtral-8x22b/config.yaml | 2 +- .../config.yaml | 2 +- mistral/mixtral-8x7b-instruct-trt-llm/config.yaml | 2 +- mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/config.yaml | 4 ++-- mistral/mixtral-8x7b-instruct-vllm/config.yaml | 2 +- mistral/pixtral-12b/config.yaml | 2 +- nous-capybara/nous-capybara-34b-openai/config.yaml | 2 +- nous-capybara/nous-capybara-34b/config.yaml | 2 +- orpheus-tts/orpheus-tts-streaming/config.yaml | 2 +- stable-diffusion/playground-v2-trt/config.yaml | 4 ++-- stable-diffusion/sdxl-lightning/config.yaml | 2 +- stable-diffusion/sdxl-lora-swapping/config.yaml | 2 +- stable-diffusion/stable-diffusion-3-medium/config.yaml | 2 +- stable-diffusion/stable-diffusion-xl-1.0-trt/config.yaml | 2 +- stable-diffusion/stable-video-diffusion/config.yaml | 2 +- templates/trt-llm/config.yaml | 2 +- text-embeddings-inference/config.yaml | 2 +- ultravox/config.yaml | 2 +- vllm/config.yaml | 2 +- 40 files changed, 43 insertions(+), 43 deletions(-) diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-m3-embedding-dense/config.yaml b/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-m3-embedding-dense/config.yaml index 4be9d920f..ba68cf653 100644 --- a/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-m3-embedding-dense/config.yaml +++ b/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-m3-embedding-dense/config.yaml @@ -10,7 +10,7 @@ model_name: BEI-baai-bge-m3-embedding-dense-truss-example python_version: py39 requirements: [] resources: - accelerator: A100 + accelerator: H100 cpu: '1' memory: 10Gi use_gpu: true diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-reranker-v2-m3-multilingual/config.yaml b/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-reranker-v2-m3-multilingual/config.yaml index b172841fc..defae307a 100644 --- a/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-reranker-v2-m3-multilingual/config.yaml +++ b/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-reranker-v2-m3-multilingual/config.yaml @@ -15,7 +15,7 @@ model_name: BEI-baai-bge-reranker-v2-m3-multilingual-truss-example python_version: py39 requirements: [] resources: - accelerator: A100 + accelerator: H100 cpu: '1' memory: 10Gi use_gpu: true diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-snowflake-snowflake-arctic-embed-l-v2.0/config.yaml b/11-embeddings-reranker-classification-tensorrt/BEI-snowflake-snowflake-arctic-embed-l-v2.0/config.yaml index 9cae68b6f..b1432b798 100644 --- a/11-embeddings-reranker-classification-tensorrt/BEI-snowflake-snowflake-arctic-embed-l-v2.0/config.yaml +++ b/11-embeddings-reranker-classification-tensorrt/BEI-snowflake-snowflake-arctic-embed-l-v2.0/config.yaml @@ -10,7 +10,7 @@ model_name: BEI-snowflake-snowflake-arctic-embed-l-v2.0-truss-example python_version: py39 requirements: [] resources: - accelerator: A100 + accelerator: H100 cpu: '1' memory: 10Gi use_gpu: true diff --git a/11-embeddings-reranker-classification-tensorrt/Briton-qwen-qwen2-57b-a14b-moe-int4/config.yaml b/11-embeddings-reranker-classification-tensorrt/Briton-qwen-qwen2-57b-a14b-moe-int4/config.yaml index 65ff3ea8d..adc474279 100644 --- a/11-embeddings-reranker-classification-tensorrt/Briton-qwen-qwen2-57b-a14b-moe-int4/config.yaml +++ b/11-embeddings-reranker-classification-tensorrt/Briton-qwen-qwen2-57b-a14b-moe-int4/config.yaml @@ -16,7 +16,7 @@ model_name: Briton-qwen-qwen2-57b-a14b-moe-int4-truss-example python_version: py39 requirements: [] resources: - accelerator: A100 + accelerator: H100 cpu: '1' memory: 10Gi use_gpu: true diff --git a/cogvlm/config.yaml b/cogvlm/config.yaml index 83591b309..c04411e96 100644 --- a/cogvlm/config.yaml +++ b/cogvlm/config.yaml @@ -13,7 +13,7 @@ requirements: - xformers==0.0.22 - accelerate==0.25.0 resources: - accelerator: A100 + accelerator: H100 cpu: '3' memory: 15Gi use_gpu: true diff --git a/comfyui-truss/examples/anime-style-transfer/config.yaml b/comfyui-truss/examples/anime-style-transfer/config.yaml index 04442c3d3..d16108e21 100644 --- a/comfyui-truss/examples/anime-style-transfer/config.yaml +++ b/comfyui-truss/examples/anime-style-transfer/config.yaml @@ -20,7 +20,7 @@ requirements: - accelerate==0.23.0 - opencv-python resources: - accelerator: A100 + accelerator: H100 use_gpu: true secrets: {} system_packages: diff --git a/deepspeed-mii/config.yaml b/deepspeed-mii/config.yaml index 44f423609..93d8f9887 100644 --- a/deepspeed-mii/config.yaml +++ b/deepspeed-mii/config.yaml @@ -18,7 +18,7 @@ python_version: py311 requirements: - deepspeed-mii==0.1.1 resources: - accelerator: A100 + accelerator: H100 cpu: '3' memory: 14Gi use_gpu: true diff --git a/gemma/gemma-2-27b-it-vllm/config.yaml b/gemma/gemma-2-27b-it-vllm/config.yaml index 26593477f..047a137f4 100644 --- a/gemma/gemma-2-27b-it-vllm/config.yaml +++ b/gemma/gemma-2-27b-it-vllm/config.yaml @@ -9,7 +9,7 @@ requirements: - vllm==0.5.1 - https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp311-cp311-linux_x86_64.whl resources: - accelerator: A100 + accelerator: H100 use_gpu: true runtime: predict_concurrency: 128 diff --git a/gemma/gemma-2-9b-it-vllm/config.yaml b/gemma/gemma-2-9b-it-vllm/config.yaml index d6b1306a5..168386357 100644 --- a/gemma/gemma-2-9b-it-vllm/config.yaml +++ b/gemma/gemma-2-9b-it-vllm/config.yaml @@ -8,7 +8,7 @@ requirements: - vllm==0.5.1 - https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp311-cp311-linux_x86_64.whl resources: - accelerator: A100 + accelerator: H100 use_gpu: true runtime: predict_concurrency: 128 diff --git a/llama/llama-2-13b-chat/config.yaml b/llama/llama-2-13b-chat/config.yaml index b744f1598..031fe4eee 100644 --- a/llama/llama-2-13b-chat/config.yaml +++ b/llama/llama-2-13b-chat/config.yaml @@ -34,7 +34,7 @@ requirements: - torch==2.0.1 - transformers==4.32.1 resources: - accelerator: A100 + accelerator: H100 cpu: '3' memory: 14Gi use_gpu: true diff --git a/llama/llama-2-13b/config.yaml b/llama/llama-2-13b/config.yaml index 6a193610e..9bfa371e3 100644 --- a/llama/llama-2-13b/config.yaml +++ b/llama/llama-2-13b/config.yaml @@ -22,7 +22,7 @@ requirements: - torch==2.0.1 - transformers==4.32.1 resources: - accelerator: A100:1 + accelerator: H100:1 cpu: '3' memory: 14Gi use_gpu: true diff --git a/llama/llama-2-70b-chat/config.yaml b/llama/llama-2-70b-chat/config.yaml index 5f258010f..c94f7921c 100644 --- a/llama/llama-2-70b-chat/config.yaml +++ b/llama/llama-2-70b-chat/config.yaml @@ -33,7 +33,7 @@ requirements: - torch==2.0.1 - transformers==4.32.1 resources: - accelerator: A100:2 + accelerator: H100:2 cpu: '3' memory: 14Gi use_gpu: true diff --git a/llama/llama-2-70b/config.yaml b/llama/llama-2-70b/config.yaml index 79bc91a6e..6762f1883 100644 --- a/llama/llama-2-70b/config.yaml +++ b/llama/llama-2-70b/config.yaml @@ -22,7 +22,7 @@ requirements: - torch==2.0.1 - transformers==4.32.1 resources: - accelerator: A100:2 + accelerator: H100:2 cpu: '3' memory: 14Gi use_gpu: true diff --git a/llama/llama-3-8b-instruct/config.yaml b/llama/llama-3-8b-instruct/config.yaml index 04a0c9bc2..62fe05787 100644 --- a/llama/llama-3-8b-instruct/config.yaml +++ b/llama/llama-3-8b-instruct/config.yaml @@ -16,7 +16,7 @@ requirements: - transformers - torch resources: - accelerator: A100 + accelerator: H100 use_gpu: true secrets: hf_access_token: "your-hf-access-token" diff --git a/llama/llama-3_1_70b-instruct/config.yaml b/llama/llama-3_1_70b-instruct/config.yaml index a6776abf7..97d82eae8 100644 --- a/llama/llama-3_1_70b-instruct/config.yaml +++ b/llama/llama-3_1_70b-instruct/config.yaml @@ -7,7 +7,7 @@ requirements: - vllm==0.5.3post1 - accelerate resources: - accelerator: A100:4 + accelerator: H100:4 use_gpu: true runtime: predict_concurrency: 128 diff --git a/llama/llama-3_2-11b-vision-instruct/config.yaml b/llama/llama-3_2-11b-vision-instruct/config.yaml index 936cf9353..5f6b2d761 100644 --- a/llama/llama-3_2-11b-vision-instruct/config.yaml +++ b/llama/llama-3_2-11b-vision-instruct/config.yaml @@ -31,7 +31,7 @@ docker_server: predict_endpoint: /v1/chat/completions server_port: 8000 resources: - accelerator: A100 + accelerator: H100 use_gpu: true model_name: Llama 3.2 11B Vision Instruct secrets: diff --git a/llama/llama-7b-exllama-streaming/config.yaml b/llama/llama-7b-exllama-streaming/config.yaml index 774b328cd..3c2d6e5fb 100644 --- a/llama/llama-7b-exllama-streaming/config.yaml +++ b/llama/llama-7b-exllama-streaming/config.yaml @@ -8,7 +8,7 @@ python_version: py311 requirements: - exllamav2==0.0.5 resources: - accelerator: A100 + accelerator: H100 cpu: '1' memory: 2Gi use_gpu: true diff --git a/llama/llama-7b-exllama/config.yaml b/llama/llama-7b-exllama/config.yaml index 00b404063..c22d55f68 100644 --- a/llama/llama-7b-exllama/config.yaml +++ b/llama/llama-7b-exllama/config.yaml @@ -8,7 +8,7 @@ python_version: py311 requirements: - exllamav2==0.0.5 resources: - accelerator: A100 + accelerator: H100 cpu: '1' memory: 2Gi use_gpu: true diff --git a/llava/llava-1.6-sgl/config.yaml b/llava/llava-1.6-sgl/config.yaml index ae2eb6956..130725075 100644 --- a/llava/llava-1.6-sgl/config.yaml +++ b/llava/llava-1.6-sgl/config.yaml @@ -5,7 +5,7 @@ python_version: py310 requirements: [] requirements_file: ./requirements.txt resources: - accelerator: A100 + accelerator: H100 use_gpu: true runtime: predict_concurrency: 128 diff --git a/llava/llava-v1.6-34b/config.yaml b/llava/llava-v1.6-34b/config.yaml index a5beab7d8..b96056302 100644 --- a/llava/llava-v1.6-34b/config.yaml +++ b/llava/llava-v1.6-34b/config.yaml @@ -5,7 +5,7 @@ python_version: py311 requirements: - git+https://github.com/haotian-liu/LLaVA.git resources: - accelerator: A100 + accelerator: H100 use_gpu: true secrets: {} system_packages: [] diff --git a/mistral/mixtral-8x22b-trt-int8-weights-only/config.yaml b/mistral/mixtral-8x22b-trt-int8-weights-only/config.yaml index c6b5e6a8b..fbde5f010 100644 --- a/mistral/mixtral-8x22b-trt-int8-weights-only/config.yaml +++ b/mistral/mixtral-8x22b-trt-int8-weights-only/config.yaml @@ -8,7 +8,7 @@ external_package_dirs: [] model_metadata: avatar_url: https://cdn.baseten.co/production/static/explore/mistral_logo.png cover_image_url: https://cdn.baseten.co/production/static/explore/mistral.png - engine_repository: baseten/mixtral-8x22B_i60000_o4000_bs2_tp4_int8_weights_only_A100-tllm_0.9.0.dev2024022000 + engine_repository: baseten/mixtral-8x22B_i60000_o4000_bs2_tp4_int8_weights_only_H100-tllm_0.9.0.dev2024022000 example_model_input: max_tokens: 512 messages: @@ -31,7 +31,7 @@ requirements: - tritonclient[all] - transformers==4.42.3 resources: - accelerator: A100:4 + accelerator: H100:4 use_gpu: true runtime: num_workers: 1 diff --git a/mistral/mixtral-8x22b/config.yaml b/mistral/mixtral-8x22b/config.yaml index d469b4b35..31b4bf636 100644 --- a/mistral/mixtral-8x22b/config.yaml +++ b/mistral/mixtral-8x22b/config.yaml @@ -16,7 +16,7 @@ requirements: - transformers==4.42.3 - torch==2.2.0 resources: - accelerator: A100:4 + accelerator: H100:4 use_gpu: true secrets: hf_access_token: "ENTER HF ACCESS TOKEN HERE" diff --git a/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant/config.yaml b/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant/config.yaml index cfafdd705..0db059400 100644 --- a/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant/config.yaml +++ b/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant/config.yaml @@ -32,7 +32,7 @@ requirements: - tritonclient[all] - transformers==4.42.3 resources: - accelerator: A100 + accelerator: H100 use_gpu: true runtime: num_workers: 1 diff --git a/mistral/mixtral-8x7b-instruct-trt-llm/config.yaml b/mistral/mixtral-8x7b-instruct-trt-llm/config.yaml index 15bff9bab..e342b1e8e 100644 --- a/mistral/mixtral-8x7b-instruct-trt-llm/config.yaml +++ b/mistral/mixtral-8x7b-instruct-trt-llm/config.yaml @@ -31,7 +31,7 @@ requirements: - tritonclient[all] - transformers==4.42.3 resources: - accelerator: A100:2 + accelerator: H100:2 use_gpu: true runtime: num_workers: 1 diff --git a/mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/config.yaml b/mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/config.yaml index ccc476bea..63b6a5c4d 100644 --- a/mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/config.yaml +++ b/mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/config.yaml @@ -1,11 +1,11 @@ environment_variables: {} external_package_dirs: [] -model_name: Mixtral 8x7B — VLLM TP2 — A100:2 +model_name: Mixtral 8x7B — VLLM TP2 — H100:2 python_version: py310 requirements: - vllm resources: - accelerator: A100:2 + accelerator: H100:2 use_gpu: true runtime: predict_concurrency: 128 diff --git a/mistral/mixtral-8x7b-instruct-vllm/config.yaml b/mistral/mixtral-8x7b-instruct-vllm/config.yaml index f219fc452..3ea5c07d7 100644 --- a/mistral/mixtral-8x7b-instruct-vllm/config.yaml +++ b/mistral/mixtral-8x7b-instruct-vllm/config.yaml @@ -5,7 +5,7 @@ python_version: py310 requirements: - vllm==0.2.5 resources: - accelerator: A100:2 + accelerator: H100:2 use_gpu: true runtime: predict_concurrency: 128 diff --git a/mistral/pixtral-12b/config.yaml b/mistral/pixtral-12b/config.yaml index 5e1d0dba8..9f161118d 100644 --- a/mistral/pixtral-12b/config.yaml +++ b/mistral/pixtral-12b/config.yaml @@ -39,5 +39,5 @@ secrets: requirements: - vllm==0.6.1 resources: - accelerator: A100 + accelerator: H100 use_gpu: true diff --git a/nous-capybara/nous-capybara-34b-openai/config.yaml b/nous-capybara/nous-capybara-34b-openai/config.yaml index 23e4843be..55fb195af 100644 --- a/nous-capybara/nous-capybara-34b-openai/config.yaml +++ b/nous-capybara/nous-capybara-34b-openai/config.yaml @@ -15,7 +15,7 @@ requirements: - scipy==1.11.4 - sentencepiece==0.1.99 resources: - accelerator: A100 + accelerator: H100 cpu: '3' memory: 20Gi use_gpu: true diff --git a/nous-capybara/nous-capybara-34b/config.yaml b/nous-capybara/nous-capybara-34b/config.yaml index fc567d33e..be6124443 100644 --- a/nous-capybara/nous-capybara-34b/config.yaml +++ b/nous-capybara/nous-capybara-34b/config.yaml @@ -14,7 +14,7 @@ requirements: - scipy==1.11.4 - sentencepiece==0.1.99 resources: - accelerator: A100 + accelerator: H100 cpu: '3' memory: 20Gi use_gpu: true diff --git a/orpheus-tts/orpheus-tts-streaming/config.yaml b/orpheus-tts/orpheus-tts-streaming/config.yaml index 05be50da4..f98ec03f0 100644 --- a/orpheus-tts/orpheus-tts-streaming/config.yaml +++ b/orpheus-tts/orpheus-tts-streaming/config.yaml @@ -10,7 +10,7 @@ requirements: - huggingface_hub[hf_transfer] - hf_transfer==0.1.9 resources: - accelerator: A100 + accelerator: H100 # accelerator: H100_40GB use_gpu: true runtime: diff --git a/stable-diffusion/playground-v2-trt/config.yaml b/stable-diffusion/playground-v2-trt/config.yaml index 249e13d4e..412be96ac 100644 --- a/stable-diffusion/playground-v2-trt/config.yaml +++ b/stable-diffusion/playground-v2-trt/config.yaml @@ -6,7 +6,7 @@ environment_variables: HF_HUB_ENABLE_HF_TRANSFER: 1 external_package_dirs: [] model_cache: -- repo_id: baseten/playground-v2-trt-8.6.1.post1-engine-A100 +- repo_id: baseten/playground-v2-trt-8.6.1.post1-engine-H100 - allow_patterns: - config.json - diffusion_pytorch_model.safetensors @@ -42,7 +42,7 @@ requirements: - --extra-index-url https://pypi.nvidia.com - tensorrt==8.6.1.post1 resources: - accelerator: A100 + accelerator: H100 use_gpu: true runtime: predict_concurrency: 1 diff --git a/stable-diffusion/sdxl-lightning/config.yaml b/stable-diffusion/sdxl-lightning/config.yaml index 847f14942..16afe6e0f 100644 --- a/stable-diffusion/sdxl-lightning/config.yaml +++ b/stable-diffusion/sdxl-lightning/config.yaml @@ -18,7 +18,7 @@ requirements: - xformers==0.0.22 - accelerate==0.24.1 resources: - accelerator: A100 + accelerator: H100 use_gpu: true secrets: {} system_packages: [] diff --git a/stable-diffusion/sdxl-lora-swapping/config.yaml b/stable-diffusion/sdxl-lora-swapping/config.yaml index e78e10fba..ebf3d984b 100644 --- a/stable-diffusion/sdxl-lora-swapping/config.yaml +++ b/stable-diffusion/sdxl-lora-swapping/config.yaml @@ -15,7 +15,7 @@ requirements: - opencv-python==4.8.0.76 - diffusers==0.21.2 resources: - accelerator: A100 + accelerator: H100 cpu: 3500m memory: 20Gi use_gpu: true diff --git a/stable-diffusion/stable-diffusion-3-medium/config.yaml b/stable-diffusion/stable-diffusion-3-medium/config.yaml index e313f612c..bf5de3c9f 100644 --- a/stable-diffusion/stable-diffusion-3-medium/config.yaml +++ b/stable-diffusion/stable-diffusion-3-medium/config.yaml @@ -13,7 +13,7 @@ requirements: - sentencepiece - protobuf resources: - accelerator: A100 + accelerator: H100 use_gpu: true secrets: hf_access_token: "" diff --git a/stable-diffusion/stable-diffusion-xl-1.0-trt/config.yaml b/stable-diffusion/stable-diffusion-xl-1.0-trt/config.yaml index 210586c7d..739193844 100644 --- a/stable-diffusion/stable-diffusion-xl-1.0-trt/config.yaml +++ b/stable-diffusion/stable-diffusion-xl-1.0-trt/config.yaml @@ -49,7 +49,7 @@ requirements: - --extra-index-url https://pypi.nvidia.com - tensorrt==8.6.1.post1 resources: - accelerator: A100 + accelerator: H100 use_gpu: true runtime: predict_concurrency: 1 diff --git a/stable-diffusion/stable-video-diffusion/config.yaml b/stable-diffusion/stable-video-diffusion/config.yaml index 3dc105678..8f9695a88 100644 --- a/stable-diffusion/stable-video-diffusion/config.yaml +++ b/stable-diffusion/stable-video-diffusion/config.yaml @@ -29,7 +29,7 @@ requirements: - hf_transfer==0.1.4 - git+https://github.com/Stability-AI/generative-models.git@059d8e9cd9c55aea1ef2ece39abf605efb8b7cc9 resources: - accelerator: A100 + accelerator: H100 cpu: '4' memory: 16Gi use_gpu: true diff --git a/templates/trt-llm/config.yaml b/templates/trt-llm/config.yaml index cad99b5db..be32e63f8 100644 --- a/templates/trt-llm/config.yaml +++ b/templates/trt-llm/config.yaml @@ -12,7 +12,7 @@ python_version: py311 requirements: - tritonclient[all] resources: - accelerator: A100 + accelerator: H100 use_gpu: true runtime: predict_concurrency: 256 diff --git a/text-embeddings-inference/config.yaml b/text-embeddings-inference/config.yaml index 82332a3ce..8a2539a78 100644 --- a/text-embeddings-inference/config.yaml +++ b/text-embeddings-inference/config.yaml @@ -2,7 +2,7 @@ base_image: # select an image: L4 # CPU baseten/text-embeddings-inference-mirror:cpu-1.6 # Turing (T4, ...) baseten/text-embeddings-inference-mirror:turing-1.6 - # Ampere 80 (A100, A30) baseten/text-embeddings-inference-mirror:1.6 + # Ampere 80 (H100, A30) baseten/text-embeddings-inference-mirror:1.6 # Ampere 86 (A10, A10G, A40, ...) baseten/text-embeddings-inference-mirror:86-1.6 # Ada Lovelace (L4, ...) baseten/text-embeddings-inference-mirror:89-1.6 # Hopper (H100/H100 40GB) baseten/text-embeddings-inference-mirror:hopper-1.6 diff --git a/ultravox/config.yaml b/ultravox/config.yaml index b56bf0c5c..0b2554a1b 100644 --- a/ultravox/config.yaml +++ b/ultravox/config.yaml @@ -14,7 +14,7 @@ runtime: requirements: - httpx resources: - accelerator: A100 + accelerator: H100 use_gpu: true secrets: {} system_packages: diff --git a/vllm/config.yaml b/vllm/config.yaml index bd98b7e40..4e2da6a1a 100644 --- a/vllm/config.yaml +++ b/vllm/config.yaml @@ -11,7 +11,7 @@ model_metadata: requirements: - vllm==0.5.4 resources: - accelerator: A100 + accelerator: H100 use_gpu: true runtime: predict_concurrency: 128