2020 _construct_url ,
2121)
2222from sagemaker .serve .utils .exceptions import ModelBuilderException , LocalModelOutOfMemoryException
23+ from sagemaker .user_agent import SDK_VERSION
2324
2425MOCK_SESSION = Mock ()
2526MOCK_FUNC_NAME = "Mock.deploy"
3233)
3334MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf"
3435MOCK_EXCEPTION = LocalModelOutOfMemoryException ("mock raise ex" )
36+ MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test"
3537
3638
3739class ModelBuilderMock :
@@ -72,15 +74,22 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry):
7274 mock_model_builder .model = MOCK_HUGGINGFACE_ID
7375 mock_model_builder .mode = Mode .LOCAL_CONTAINER
7476 mock_model_builder .model_server = ModelServer .DJL_SERVING
77+ mock_model_builder .sagemaker_session .endpoint_arn = MOCK_ENDPOINT_ARN
7578
7679 mock_model_builder .mock_deploy ()
7780
81+ args = mock_send_telemetry .call_args .args
82+ latency = str (args [5 ]).split ("latency=" )[1 ]
7883 expected_extra_str = (
7984 f"{ MOCK_FUNC_NAME } "
8085 "&x-modelServer=4"
8186 "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
87+ f"&x-sdkVersion={ SDK_VERSION } "
8288 f"&x-modelName={ MOCK_HUGGINGFACE_ID } "
89+ f"&x-endpointArn={ MOCK_ENDPOINT_ARN } "
90+ f"&x-latency={ latency } "
8391 )
92+
8493 mock_send_telemetry .assert_called_once_with (
8594 "1" , 2 , MOCK_SESSION , None , None , expected_extra_str
8695 )
@@ -93,15 +102,22 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry):
93102 mock_model_builder .model = MOCK_HUGGINGFACE_ID
94103 mock_model_builder .mode = Mode .LOCAL_CONTAINER
95104 mock_model_builder .model_server = ModelServer .TGI
105+ mock_model_builder .sagemaker_session .endpoint_arn = MOCK_ENDPOINT_ARN
96106
97107 mock_model_builder .mock_deploy ()
98108
109+ args = mock_send_telemetry .call_args .args
110+ latency = str (args [5 ]).split ("latency=" )[1 ]
99111 expected_extra_str = (
100112 f"{ MOCK_FUNC_NAME } "
101113 "&x-modelServer=6"
102114 "&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04"
115+ f"&x-sdkVersion={ SDK_VERSION } "
103116 f"&x-modelName={ MOCK_HUGGINGFACE_ID } "
117+ f"&x-endpointArn={ MOCK_ENDPOINT_ARN } "
118+ f"&x-latency={ latency } "
104119 )
120+
105121 mock_send_telemetry .assert_called_once_with (
106122 "1" , 2 , MOCK_SESSION , None , None , expected_extra_str
107123 )
@@ -126,6 +142,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
126142 mock_model_builder .model = MOCK_HUGGINGFACE_ID
127143 mock_model_builder .mode = Mode .LOCAL_CONTAINER
128144 mock_model_builder .model_server = ModelServer .DJL_SERVING
145+ mock_model_builder .sagemaker_session .endpoint_arn = MOCK_ENDPOINT_ARN
129146
130147 mock_exception = Mock ()
131148 mock_exception_obj = MOCK_EXCEPTION
@@ -134,12 +151,18 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
134151 with self .assertRaises (ModelBuilderException ) as _ :
135152 mock_model_builder .mock_deploy (mock_exception )
136153
154+ args = mock_send_telemetry .call_args .args
155+ latency = str (args [5 ]).split ("latency=" )[1 ]
137156 expected_extra_str = (
138157 f"{ MOCK_FUNC_NAME } "
139158 "&x-modelServer=4"
140159 "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
160+ f"&x-sdkVersion={ SDK_VERSION } "
141161 f"&x-modelName={ MOCK_HUGGINGFACE_ID } "
162+ f"&x-endpointArn={ MOCK_ENDPOINT_ARN } "
163+ f"&x-latency={ latency } "
142164 )
165+
143166 mock_send_telemetry .assert_called_once_with (
144167 "0" ,
145168 2 ,
0 commit comments