diff --git a/python/openai/tests/vllm_mistral_models/mistral-nemo-instruct-2407/1/model.json b/python/openai/tests/vllm_mistral_models/mistral-nemo-instruct-2407/1/model.json index 835e537349..b7ce0ee199 100644 --- a/python/openai/tests/vllm_mistral_models/mistral-nemo-instruct-2407/1/model.json +++ b/python/openai/tests/vllm_mistral_models/mistral-nemo-instruct-2407/1/model.json @@ -1 +1 @@ -{"model": "mistralai/Mistral-Nemo-Instruct-2407", "disable_log_requests": true, "gpu_memory_utilization": 0.9} \ No newline at end of file +{"model": "mistralai/Mistral-Nemo-Instruct-2407", "gpu_memory_utilization": 0.9} \ No newline at end of file diff --git a/python/openai/tests/vllm_models/llama-3.1-8b-instruct/1/model.json b/python/openai/tests/vllm_models/llama-3.1-8b-instruct/1/model.json index cb9b14c765..df85a05da0 100644 --- a/python/openai/tests/vllm_models/llama-3.1-8b-instruct/1/model.json +++ b/python/openai/tests/vllm_models/llama-3.1-8b-instruct/1/model.json @@ -1 +1 @@ -{"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "disable_log_requests": true, "gpu_memory_utilization": 0.9} +{"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "gpu_memory_utilization": 0.9} diff --git a/qa/L0_perf_vllm/test.sh b/qa/L0_perf_vllm/test.sh index e1ce8cf2ed..2c9fc87cb4 100755 --- a/qa/L0_perf_vllm/test.sh +++ b/qa/L0_perf_vllm/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -47,7 +47,6 @@ rm -rf $MODEL_REPO $EXPORT_FILE *.tjson *.json *.csv mkdir -p $MODEL_REPO/$MODEL_NAME/1 echo '{ "model":"gpt2", - "disable_log_requests": "true", "gpu_memory_utilization": 0.5 }' >$MODEL_REPO/$MODEL_NAME/1/model.json