File tree Expand file tree Collapse file tree 2 files changed +3
-3
lines changed
L0_backend_vllm/metrics_test
L0_multi_gpu_vllm/multi_lora Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -48,7 +48,7 @@ def setUp(self):
4848 "The capital of France is" ,
4949 "The future of AI is" ,
5050 ]
51- self .sampling_parameters = {"temperature" : 0 , "top_p" : 1 }
51+ self .sampling_parameters = {"temperature" : "0" , "top_p" : "1" }
5252
5353 def parse_vllm_metrics (self ):
5454 """
Original file line number Diff line number Diff line change 11#! /bin/bash
2- # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # Copyright 2024-2025 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33#
44# Redistribution and use in source and binary forms, with or without
55# modification, are permitted provided that the following conditions
@@ -112,7 +112,7 @@ model_json=$(cat <<EOF
112112 "model":"./weights/backbone/gemma-2b",
113113 "disable_log_requests": true,
114114 "gpu_memory_utilization": 0.7,
115- "tensor_parallel_size": 1 ,
115+ "tensor_parallel_size": 2 ,
116116 "block_size": 16,
117117 "enforce_eager": true,
118118 "enable_lora": true,
You can’t perform that action at this time.
0 commit comments