Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions qwen/qwen-3-30B-A3/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
model_name: qwen-30b-BIS
python_version: py39
resources:
accelerator: H100
cpu: '1'
memory: 10Gi
use_gpu: true
trt_llm:
build:
base_model: decoder
checkpoint_repository:
repo: Qwen/Qwen3-30B-A3B-Instruct-2507
revision: main
source: HF
inference_stack: v2
runtime:
enable_chunked_prefill: true
max_batch_size: 128
max_num_tokens: 8192
max_seq_len: 131072
served_model_name: Qwen/Qwen3-30B-A3B-Instruct-2507
tensor_parallel_size: 1
Loading