From e6fb2a51e264378e042e6e00c202a4b2f98e8d09 Mon Sep 17 00:00:00 2001 From: Bola Malek Date: Fri, 1 Aug 2025 16:09:59 -0700 Subject: [PATCH] Add Qwen3 with BIS --- qwen/qwen-3-30B-A3/config.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 qwen/qwen-3-30B-A3/config.yaml diff --git a/qwen/qwen-3-30B-A3/config.yaml b/qwen/qwen-3-30B-A3/config.yaml new file mode 100644 index 000000000..dea620649 --- /dev/null +++ b/qwen/qwen-3-30B-A3/config.yaml @@ -0,0 +1,22 @@ +model_name: qwen-30b-BIS +python_version: py39 +resources: + accelerator: H100 + cpu: '1' + memory: 10Gi + use_gpu: true +trt_llm: + build: + base_model: decoder + checkpoint_repository: + repo: Qwen/Qwen3-30B-A3B-Instruct-2507 + revision: main + source: HF + inference_stack: v2 + runtime: + enable_chunked_prefill: true + max_batch_size: 128 + max_num_tokens: 8192 + max_seq_len: 131072 + served_model_name: Qwen/Qwen3-30B-A3B-Instruct-2507 + tensor_parallel_size: 1