Skip to content

Commit b01d7d7

Browse files
committed
fix: Remove deprecated config name (#95)
1 parent ed9ff01 commit b01d7d7

File tree

4 files changed

+3
-10
lines changed

4 files changed

+3
-10
lines changed

ci/L0_backend_vllm/vllm_backend/test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -64,7 +64,7 @@ wget -P models/add_sub https://raw.githubusercontent.com/triton-inference-server
6464

6565
# Invalid model attribute
6666
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_1/
67-
sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json
67+
sed -i 's/"enforce_eager"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json
6868

6969
# Invalid model name
7070
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_2/

ci/L0_multi_gpu_vllm/multi_lora/test.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ export SERVER_ENABLE_LORA=true
110110
model_json=$(cat <<EOF
111111
{
112112
"model":"./weights/backbone/gemma-2b",
113-
"disable_log_requests": true,
114113
"gpu_memory_utilization": 0.7,
115114
"tensor_parallel_size": 2,
116115
"block_size": 16,
@@ -202,7 +201,6 @@ wait $SERVER_PID
202201
model_json=$(cat <<EOF
203202
{
204203
"model":"./weights/backbone/gemma-2b",
205-
"disable_log_requests": true,
206204
"gpu_memory_utilization": 0.7,
207205
"tensor_parallel_size": 2,
208206
"block_size": 16,
@@ -282,7 +280,6 @@ export SERVER_ENABLE_LORA=false
282280
model_json=$(cat <<EOF
283281
{
284282
"model":"./weights/backbone/gemma-2b",
285-
"disable_log_requests": true,
286283
"gpu_memory_utilization": 0.8,
287284
"tensor_parallel_size": 2,
288285
"block_size": 16,
@@ -344,7 +341,6 @@ export SERVER_ENABLE_LORA=false
344341
model_json=$(cat <<EOF
345342
{
346343
"model":"./weights/backbone/gemma-2b",
347-
"disable_log_requests": true,
348344
"gpu_memory_utilization": 0.8,
349345
"tensor_parallel_size": 2,
350346
"block_size": 16,

docs/llama_multi_lora_tutorial.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -146,7 +146,6 @@ For this tutorial we will use the following set of parameters, specified in the
146146
```json
147147
{
148148
"model":"/vllm_workspace/weights/backbone/llama-7b-hf",
149-
"disable_log_requests": "true",
150149
"gpu_memory_utilization": 0.8,
151150
"tensor_parallel_size": 2,
152151
"block_size": 16,
@@ -157,7 +156,6 @@ For this tutorial we will use the following set of parameters, specified in the
157156
```
158157

159158
+ `model`: The path to your model repository
160-
+ `disable_log_requests`: To show logs when launch vllm or not.
161159
+ `gpu_memory_utilization`: The gpu memory allocated for the model weights and vllm *PagedAttention* kv cache manager.
162160
+ `tensor_parallel_size`: The vllm now support the tensor paralism, so you can decide how many gpus you want to use for serving.
163161
+ `block_size`: vLLM kv cache block size.
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
{
22
"model":"facebook/opt-125m",
3-
"disable_log_requests": true,
43
"gpu_memory_utilization": 0.5,
54
"enforce_eager": true
65
}

0 commit comments

Comments
 (0)