4444 "HF_MODEL_ID" : "TheBloke/Llama-2-7b-chat-fp16" ,
4545 "TENSOR_PARALLEL_DEGREE" : "1" ,
4646 "OPTION_DTYPE" : "bf16" ,
47+ "MODEL_LOADING_TIMEOUT" : "1800" ,
4748}
4849
4950mock_schema_builder = MagicMock ()
@@ -63,8 +64,13 @@ class TestDjlBuilder(unittest.TestCase):
6364 )
6465 @patch ("sagemaker.serve.builder.djl_builder._get_ram_usage_mb" , return_value = 1024 )
6566 @patch ("sagemaker.serve.builder.djl_builder._get_nb_instance" , return_value = "ml.g5.24xlarge" )
67+ @patch (
68+ "sagemaker.serve.builder.djl_builder._get_default_djl_configurations" ,
69+ return_value = (mock_default_configs , 128 ),
70+ )
6671 def test_build_deploy_for_djl_local_container (
6772 self ,
73+ mock_default_djl_config ,
6874 mock_get_nb_instance ,
6975 mock_get_ram_usage_mb ,
7076 mock_is_jumpstart_model ,
@@ -125,8 +131,13 @@ def test_build_deploy_for_djl_local_container(
125131 "sagemaker.serve.builder.djl_builder._concurrent_benchmark" ,
126132 side_effect = [(0.03 , 16 ), (0.10 , 4 ), (0.15 , 2 )],
127133 )
134+ @patch (
135+ "sagemaker.serve.builder.djl_builder._get_default_djl_configurations" ,
136+ return_value = (mock_default_configs , 128 ),
137+ )
128138 def test_tune_for_djl_local_container (
129139 self ,
140+ mock_default_djl_config ,
130141 mock_concurrent_benchmarks ,
131142 mock_serial_benchmarks ,
132143 mock_admissible_tensor_parallel_degrees ,
@@ -165,8 +176,10 @@ def test_tune_for_djl_local_container(
165176 "sagemaker.serve.builder.djl_builder._get_admissible_tensor_parallel_degrees" ,
166177 return_value = [4 ],
167178 )
179+ @patch ("sagemaker.serve.model_server.djl_serving.utils._get_available_gpus" , return_value = None )
168180 def test_tune_for_djl_local_container_deep_ping_ex (
169181 self ,
182+ mock_get_available_gpus ,
170183 mock_get_admissible_tensor_parallel_degrees ,
171184 mock_serial_benchmarks ,
172185 mock_get_nb_instance ,
@@ -204,8 +217,10 @@ def test_tune_for_djl_local_container_deep_ping_ex(
204217 "sagemaker.serve.builder.djl_builder._get_admissible_tensor_parallel_degrees" ,
205218 return_value = [4 ],
206219 )
220+ @patch ("sagemaker.serve.model_server.djl_serving.utils._get_available_gpus" , return_value = None )
207221 def test_tune_for_djl_local_container_load_ex (
208222 self ,
223+ mock_get_available_gpus ,
209224 mock_get_admissible_tensor_parallel_degrees ,
210225 mock_serial_benchmarks ,
211226 mock_get_nb_instance ,
@@ -245,8 +260,10 @@ def test_tune_for_djl_local_container_load_ex(
245260 "sagemaker.serve.builder.djl_builder._get_admissible_tensor_parallel_degrees" ,
246261 return_value = [4 ],
247262 )
263+ @patch ("sagemaker.serve.model_server.djl_serving.utils._get_available_gpus" , return_value = None )
248264 def test_tune_for_djl_local_container_oom_ex (
249265 self ,
266+ mock_get_available_gpus ,
250267 mock_get_admissible_tensor_parallel_degrees ,
251268 mock_serial_benchmarks ,
252269 mock_get_nb_instance ,
@@ -283,8 +300,10 @@ def test_tune_for_djl_local_container_oom_ex(
283300 "sagemaker.serve.builder.djl_builder._get_admissible_tensor_parallel_degrees" ,
284301 return_value = [4 ],
285302 )
303+ @patch ("sagemaker.serve.model_server.djl_serving.utils._get_available_gpus" , return_value = None )
286304 def test_tune_for_djl_local_container_invoke_ex (
287305 self ,
306+ mock_get_available_gpus ,
288307 mock_get_admissible_tensor_parallel_degrees ,
289308 mock_serial_benchmarks ,
290309 mock_get_nb_instance ,
0 commit comments