Skip to content

Commit 555ef42

Browse files
committed
rename base_model: llama to decoder
1 parent d943eed commit 555ef42

File tree

35 files changed

+336
-328
lines changed

35 files changed

+336
-328
lines changed

custom-engine-builder-control/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ Optionally, you can also enable:
1515

1616

1717
# Examples:
18-
This deployment is to showcase the option to generate multiple suffixes based on a previous request.
19-
We are going to hit the KV-Cache of a previous request.
18+
This deployment is to showcase the option to generate multiple suffixes based on a previous request.
19+
We are going to hit the KV-Cache of a previous request.
2020

2121
## Deployment with Truss
2222

@@ -106,7 +106,7 @@ resources:
106106
use_gpu: true
107107
trt_llm:
108108
build:
109-
base_model: qwen
109+
base_model: decoder
110110
checkpoint_repository:
111111
repo: Qwen/Qwen3-8B
112112
revision: main

custom-engine-builder-control/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ resources:
2525
use_gpu: true
2626
trt_llm:
2727
build:
28-
base_model: qwen
28+
base_model: decoder
2929
checkpoint_repository:
3030
repo: Qwen/Qwen3-8B
3131
revision: main

deepseek/engine-deepseek-r1-distill-llama-70b/config.yaml

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,37 @@ external_package_dirs: []
44
model_metadata:
55
tags:
66
- openai-compatible
7-
example_model_input: {
8-
messages: [
9-
{
10-
role: "user",
11-
content: "Which is heavier, a pound of bricks or a pound of feathers?"
12-
}
13-
],
14-
stream: true,
15-
max_tokens: 1024,
16-
temperature: 0.6,
17-
top_p: 1.0,
18-
top_k: 40,
19-
frequency_penalty: 1
20-
}
7+
example_model_input:
8+
{
9+
messages:
10+
[
11+
{
12+
role: "user",
13+
content: "Which is heavier, a pound of bricks or a pound of feathers?",
14+
},
15+
],
16+
stream: true,
17+
max_tokens: 1024,
18+
temperature: 0.6,
19+
top_p: 1.0,
20+
top_k: 40,
21+
frequency_penalty: 1,
22+
}
2123
repo_id: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
2224
model_name: DeepSeek R1 Distill Llama 70B
2325
python_version: py39
2426
requirements: []
2527
resources:
2628
accelerator: H100:2
27-
cpu: '1'
29+
cpu: "1"
2830
memory: 24Gi
2931
use_gpu: true
3032
secrets:
3133
hf_access_token: set token in baseten workspace
3234
system_packages: []
3335
trt_llm:
3436
build:
35-
base_model: llama
37+
base_model: decoder
3638
checkpoint_repository:
3739
repo: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
3840
source: HF

deepseek/engine-deepseek-r1-distill-llama-8b/config.yaml

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,37 @@ external_package_dirs: []
44
model_metadata:
55
tags:
66
- openai-compatible
7-
example_model_input: {
8-
messages: [
9-
{
10-
role: "user",
11-
content: "Which is heavier, a pound of bricks or a pound of feathers?"
12-
}
13-
],
14-
stream: true,
15-
max_tokens: 1024,
16-
temperature: 0.6,
17-
top_p: 1.0,
18-
top_k: 40,
19-
frequency_penalty: 1
20-
}
7+
example_model_input:
8+
{
9+
messages:
10+
[
11+
{
12+
role: "user",
13+
content: "Which is heavier, a pound of bricks or a pound of feathers?",
14+
},
15+
],
16+
stream: true,
17+
max_tokens: 1024,
18+
temperature: 0.6,
19+
top_p: 1.0,
20+
top_k: 40,
21+
frequency_penalty: 1,
22+
}
2123
repo_id: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
2224
model_name: DeepSeek R1 Distill Llama 8B
2325
python_version: py39
2426
requirements: []
2527
resources:
2628
accelerator: H100_40GB
27-
cpu: '1'
29+
cpu: "1"
2830
memory: 24Gi
2931
use_gpu: true
3032
secrets:
3133
hf_access_token: set token in baseten workspace
3234
system_packages: []
3335
trt_llm:
3436
build:
35-
base_model: llama
37+
base_model: decoder
3638
checkpoint_repository:
3739
repo: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
3840
source: HF

deepseek/engine-deepseek-r1-distill-qwen-14b/config.yaml

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,33 @@ external_package_dirs: []
44
model_metadata:
55
tags:
66
- openai-compatible
7-
example_model_input: {
8-
messages: [
9-
{
10-
role: "user",
11-
content: "Which is heavier, a pound of bricks or a pound of feathers?"
12-
}
13-
],
14-
stream: true,
15-
max_tokens: 1024,
16-
temperature: 0.6
17-
}
7+
example_model_input:
8+
{
9+
messages:
10+
[
11+
{
12+
role: "user",
13+
content: "Which is heavier, a pound of bricks or a pound of feathers?",
14+
},
15+
],
16+
stream: true,
17+
max_tokens: 1024,
18+
temperature: 0.6,
19+
}
1820
repo_id: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
1921
model_name: DeepSeek R1 Distill Qwen 14B
2022
python_version: py39
2123
requirements: []
2224
resources:
2325
accelerator: H100_40GB
24-
cpu: '1'
26+
cpu: "1"
2527
memory: 24Gi
2628
use_gpu: true
2729
secrets: {}
2830
system_packages: []
2931
trt_llm:
3032
build:
31-
base_model: qwen
33+
base_model: decoder
3234
checkpoint_repository:
3335
repo: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
3436
source: HF

deepseek/engine-deepseek-r1-distill-qwen-32b/config.yaml

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,33 @@ external_package_dirs: []
44
model_metadata:
55
tags:
66
- openai-compatible
7-
example_model_input: {
8-
messages: [
9-
{
10-
role: "user",
11-
content: "Which is heavier, a pound of bricks or a pound of feathers?"
12-
}
13-
],
14-
stream: true,
15-
max_tokens: 1024,
16-
temperature: 0.6
17-
}
7+
example_model_input:
8+
{
9+
messages:
10+
[
11+
{
12+
role: "user",
13+
content: "Which is heavier, a pound of bricks or a pound of feathers?",
14+
},
15+
],
16+
stream: true,
17+
max_tokens: 1024,
18+
temperature: 0.6,
19+
}
1820
repo_id: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
1921
model_name: DeepSeek R1 Distill Qwen 32B
2022
python_version: py39
2123
requirements: []
2224
resources:
2325
accelerator: H100
24-
cpu: '1'
26+
cpu: "1"
2527
memory: 24Gi
2628
use_gpu: true
2729
secrets: {}
2830
system_packages: []
2931
trt_llm:
3032
build:
31-
base_model: qwen
33+
base_model: decoder
3234
checkpoint_repository:
3335
repo: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
3436
source: HF

deepseek/engine-deepseek-r1-distill-qwen-7b/config.yaml

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,33 @@ external_package_dirs: []
44
model_metadata:
55
tags:
66
- openai-compatible
7-
example_model_input: {
8-
messages: [
9-
{
10-
role: "user",
11-
content: "Which is heavier, a pound of bricks or a pound of feathers?"
12-
}
13-
],
14-
stream: true,
15-
max_tokens: 1024,
16-
temperature: 0.6
17-
}
7+
example_model_input:
8+
{
9+
messages:
10+
[
11+
{
12+
role: "user",
13+
content: "Which is heavier, a pound of bricks or a pound of feathers?",
14+
},
15+
],
16+
stream: true,
17+
max_tokens: 1024,
18+
temperature: 0.6,
19+
}
1820
repo_id: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
1921
model_name: DeepSeek R1 Distill Qwen 7B
2022
python_version: py39
2123
requirements: []
2224
resources:
2325
accelerator: H100_40GB
24-
cpu: '1'
26+
cpu: "1"
2527
memory: 24Gi
2628
use_gpu: true
2729
secrets: {}
2830
system_packages: []
2931
trt_llm:
3032
build:
31-
base_model: qwen
33+
base_model: decoder
3234
checkpoint_repository:
3335
repo: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
3436
source: HF

falcon/falcon3-10B-trt-llm-spec-dec/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ resources:
1818
use_gpu: true
1919
trt_llm:
2020
build:
21-
base_model: llama
21+
base_model: decoder
2222
checkpoint_repository:
2323
repo: tiiuae/Falcon3-10B-Instruct
2424
source: HF

falcon/falcon3-3B-trt-llm-engine-high-throughput/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ resources:
2424
system_packages: []
2525
trt_llm:
2626
build:
27-
base_model: llama
27+
base_model: decoder
2828
checkpoint_repository:
2929
repo: tiiuae/Falcon3-3B-Instruct
3030
source: HF

internal/config.yaml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
21
model_metadata:
32
tags:
4-
- openai-compatible
3+
- openai-compatible
54
model_name: briton-spec-dec
65
python_version: py310
76
requirements: []
87
resources:
98
accelerator: A10G
10-
cpu: '1'
9+
cpu: "1"
1110
memory: 24Gi
1211
use_gpu: true
1312
runtime:
@@ -17,7 +16,7 @@ secrets:
1716
trt_llm:
1817
draft:
1918
build:
20-
base_model: deepseek
19+
base_model: decoder
2120
checkpoint_repository:
2221
repo: deepseek-ai/deepseek-coder-1.3b-instruct
2322
source: HF
@@ -32,7 +31,7 @@ trt_llm:
3231
num_draft_tokens: 4
3332
target:
3433
build:
35-
base_model: deepseek
34+
base_model: decoder
3635
checkpoint_repository:
3736
repo: deepseek-ai/deepseek-coder-1.3b-instruct
3837
source: HF

0 commit comments

Comments
 (0)