Skip to content

Commit f5273ee

Browse files
oandreeva-nvTabriziankrishung5
authored
[feat]:Custom Backend Tracing (#7403)
Co-authored-by: Iman Tabrizian <[email protected]> Co-authored-by: Kris Hung <[email protected]>
1 parent 02723f8 commit f5273ee

File tree

5 files changed

+273
-26
lines changed

5 files changed

+273
-26
lines changed

docs/user_guide/trace.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,22 @@ Then, you can specify headers in the `infer` method. For references, please
623623
look at our [tests](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py),
624624
e.g. [http context propagation test](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py#L494-L508).
625625

626+
### Custom Backend Tracing
627+
628+
In the case when a custom activity needs to be traced in the backend, please
629+
use `TRITONSERVER_InferenceTraceReportActivity` API. For examples, please
630+
refer to the [identity backend](https://github.com/triton-inference-server/identity_backend/blob/main/src/identity.cc).
631+
632+
In `openTelemetry` trace mode, if one wishes to start a new span, make sure
633+
that the name of your custom activity ends with `_START`. To end the new span,
634+
make sure that corresponding activity ends with `_END`. For example, in the
635+
identity backend, we start a `CUSTOM_ACTIVITY` span, by [reporting](https://github.com/triton-inference-server/identity_backend/blob/oandreeva-custom-trace-activity/src/identity.cc#L872-L876)
636+
`CUSTOM_ACTIVITY_START` event; and we close this span by [reporting](https://github.com/triton-inference-server/identity_backend/blob/oandreeva-custom-trace-activity/src/identity.cc#L880-L883)
637+
`CUSTOM_ACTIVITY_END` event.
638+
639+
Please note, that it is user's responsibility to make sure that all custom started
640+
spans are properly ended.
641+
626642
### Limitations
627643

628644
- OpenTelemetry trace mode is not supported on Windows systems.

qa/L0_trace/opentelemetry_unittest.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,14 @@ def setUp(self):
115115
self.bls_model_name = "bls_simple"
116116
self.trace_context_model = "trace_context"
117117
self.non_decoupled_model_name_ = "repeat_int32"
118+
self.identity_model = "custom_identity_int32"
118119
self.test_models = [
119120
self.simple_model_name,
120121
self.ensemble_model_name,
121122
self.bls_model_name,
122123
self.non_decoupled_model_name_,
123124
self.cancel_queue_model_name,
125+
self.identity_model,
124126
]
125127
self.root_span = "InferRequest"
126128
self._user_data = UserData()
@@ -219,6 +221,7 @@ def _check_events(self, span_name, events, is_cancelled):
219221
self.assertFalse(
220222
all(entry in events for entry in root_events_http + root_events_grpc)
221223
)
224+
self.assertEquals(len(events), len(compute_events))
222225

223226
elif span_name == self.root_span:
224227
# Check that root span has INFER_RESPONSE_COMPLETE, _RECV/_WAITREAD
@@ -230,23 +233,52 @@ def _check_events(self, span_name, events, is_cancelled):
230233
if "HTTP" in events:
231234
self.assertTrue(all(entry in events for entry in root_events_http))
232235
self.assertFalse(all(entry in events for entry in root_events_grpc))
236+
self.assertEquals(len(events), len(root_events_http))
233237

234238
elif "GRPC" in events:
235239
self.assertTrue(all(entry in events for entry in root_events_grpc))
236240
self.assertFalse(all(entry in events for entry in root_events_http))
241+
self.assertEquals(len(events), len(root_events_grpc))
237242

238243
if is_cancelled == False:
239244
self.assertFalse(all(entry in events for entry in request_events))
240245
self.assertFalse(all(entry in events for entry in compute_events))
241246

242247
elif span_name in self.test_models:
248+
if span_name == self.identity_model:
249+
request_events.append("CUSTOM_SINGLE_ACTIVITY")
243250
# Check that all request related events (and only them)
244251
# are recorded in request span
245252
self.assertTrue(all(entry in events for entry in request_events))
246253
self.assertFalse(
247254
all(entry in events for entry in root_events_http + root_events_grpc)
248255
)
249256
self.assertFalse(all(entry in events for entry in compute_events))
257+
self.assertEquals(len(events), len(request_events))
258+
259+
elif span_name.startswith("CUSTOM_ACTIVITY"):
260+
custom_activity_events = []
261+
if len(span_name) > len("CUSTOM_ACTIVITY"):
262+
custom_activity_events.append(str(span_name + "_START"))
263+
custom_activity_events.append(str(span_name + "_END"))
264+
# Check `custom_identity_int32` config file,
265+
# parameter `single_activity_frequency` identifies
266+
# which custom spans contain "CUSTOM_SINGLE_ACTIVITY" event
267+
if int(span_name[-1]) % 3 == 0:
268+
custom_activity_events.append("CUSTOM_SINGLE_ACTIVITY")
269+
else:
270+
custom_activity_events = [
271+
"CUSTOM_ACTIVITY_START",
272+
"CUSTOM_ACTIVITY_END",
273+
]
274+
275+
self.assertTrue(
276+
all(entry in events for entry in custom_activity_events),
277+
"Span " + span_name,
278+
)
279+
self.assertEquals(
280+
len(events), len(custom_activity_events), "Span " + span_name
281+
)
250282

251283
def _test_resource_attributes(self, attributes):
252284
"""
@@ -487,6 +519,52 @@ def _test_simple_trace(self, headers=None):
487519
expected_parent_span_dict=expected_parent_span_dict,
488520
)
489521

522+
def _test_custom_identity_trace(self, headers=None):
523+
"""
524+
Helper function, that specifies expected parameters to evaluate trace,
525+
collected from running 1 inference request for `custom_identity_int32`
526+
model.
527+
Number of custom spans defined by the identity backend.
528+
`CUSTOM_ACTIVITY` span will always be there,
529+
`CUSTOM_ACTIVITY<N>` defined by `config.pbtxt parameters`.
530+
"""
531+
expected_number_of_spans = 10
532+
expected_counts = dict(
533+
{
534+
"compute": 1,
535+
self.identity_model: 1,
536+
self.root_span: 1,
537+
"CUSTOM_ACTIVITY": 1,
538+
"CUSTOM_ACTIVITY0": 1,
539+
"CUSTOM_ACTIVITY1": 1,
540+
"CUSTOM_ACTIVITY2": 1,
541+
"CUSTOM_ACTIVITY3": 1,
542+
"CUSTOM_ACTIVITY4": 1,
543+
"CUSTOM_ACTIVITY5": 1,
544+
}
545+
)
546+
expected_parent_span_dict = dict(
547+
{
548+
"InferRequest": ["custom_identity_int32"],
549+
"custom_identity_int32": [
550+
"CUSTOM_ACTIVITY",
551+
"CUSTOM_ACTIVITY0",
552+
"compute",
553+
],
554+
"CUSTOM_ACTIVITY0": ["CUSTOM_ACTIVITY1"],
555+
"CUSTOM_ACTIVITY1": ["CUSTOM_ACTIVITY2"],
556+
"CUSTOM_ACTIVITY2": ["CUSTOM_ACTIVITY3"],
557+
"CUSTOM_ACTIVITY3": ["CUSTOM_ACTIVITY4"],
558+
"CUSTOM_ACTIVITY4": ["CUSTOM_ACTIVITY5"],
559+
}
560+
)
561+
self._test_trace(
562+
headers=headers,
563+
expected_number_of_spans=expected_number_of_spans,
564+
expected_counts=expected_counts,
565+
expected_parent_span_dict=expected_parent_span_dict,
566+
)
567+
490568
def _test_non_decoupled_trace(self, headers=None):
491569
"""
492570
Helper function, that collects trace for non decoupled model and verifies it.
@@ -944,6 +1022,33 @@ def test_trace_context_exposed_to_pbe(self):
9441022
context_pattern = re.compile(r"\d{2}-[0-9a-f]{32}-[0-9a-f]{16}-\d{2}")
9451023
self.assertIsNotNone(re.match(context_pattern, context["traceparent"]))
9461024

1025+
def test_custom_backend_tracing(self):
1026+
"""
1027+
Tests custom activities reported from identity backend.
1028+
"""
1029+
input0_ = np.array([[4]], dtype=np.int32)
1030+
with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client:
1031+
inputs = []
1032+
inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
1033+
inputs[0].set_data_from_numpy(input0_)
1034+
client.infer(self.identity_model, inputs=inputs)
1035+
self._test_custom_identity_trace()
1036+
1037+
def test_custom_backend_tracing_context_propagation(self):
1038+
"""
1039+
Tests custom activities reported from identity backend.
1040+
"""
1041+
input0_ = np.array([[4]], dtype=np.int32)
1042+
with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client:
1043+
inputs = []
1044+
inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
1045+
inputs[0].set_data_from_numpy(input0_)
1046+
client.infer(
1047+
self.identity_model, inputs=inputs, headers=self.client_headers
1048+
)
1049+
1050+
self._test_custom_identity_trace(headers=self.client_headers)
1051+
9471052

9481053
if __name__ == "__main__":
9491054
unittest.main()

qa/L0_trace/test.sh

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,16 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
9797
cp -r ../L0_decoupled/models/repeat_int32 $MODELSDIR
9898
sed -i "s/decoupled: True/decoupled: False/" $MODELSDIR/repeat_int32/config.pbtxt
9999

100+
# set up identity model
101+
mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \
102+
echo 'name: "custom_identity_int32"' >> config.pbtxt && \
103+
echo 'backend: "identity"' >> config.pbtxt && \
104+
echo 'max_batch_size: 1024' >> config.pbtxt && \
105+
echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
106+
echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
107+
echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
108+
echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }]' >> config.pbtxt)
109+
100110
RET=0
101111

102112
# Helpers =======================================
@@ -742,6 +752,60 @@ wait $SERVER_PID
742752

743753
set +e
744754

755+
# Custom backend tracing
756+
SERVER_ARGS="--model-control-mode=explicit --model-repository=$MODELSDIR
757+
--load-model=custom_identity_int32 --trace-config=level=TIMESTAMPS \
758+
--trace-config=triton,file=custom_tracing_triton.log \
759+
--trace-config=rate=1 --trace-config=mode=triton"
760+
SERVER_LOG="./custom_backend_tracing.log"
761+
run_server
762+
if [ "$SERVER_PID" == "0" ]; then
763+
echo -e "\n***\n*** Failed to start $SERVER\n***"
764+
cat $SERVER_LOG
765+
exit 1
766+
fi
767+
768+
# Send 1 inference request, should expect 3 custom activities:
769+
# CUSTOM_SINGLE_ACTIVITY, CUSTOM_ACTIVITY_START, CUSTOM_ACTIVITY_END
770+
rm -f ./curl.out
771+
data='{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,1],"data":[4]}]}'
772+
set +e
773+
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/custom_identity_int32/infer -d ${data}`
774+
set -e
775+
if [ "$code" != "200" ]; then
776+
cat ./curl.out
777+
echo -e "\n***\n*** Test Failed\n***"
778+
RET=1
779+
fi
780+
781+
set -e
782+
783+
kill $SERVER_PID
784+
wait $SERVER_PID
785+
786+
set +e
787+
788+
789+
$TRACE_SUMMARY -t custom_tracing_triton.log > summary_custom_tracing_triton.log
790+
791+
if [ `grep -c "CUSTOM_SINGLE_ACTIVITY" summary_custom_tracing_triton.log` != "1" ]; then
792+
cat summary_custom_tracing_triton.log
793+
echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY" events.\n***"
794+
RET=1
795+
fi
796+
797+
if [ `grep -c "CUSTOM_ACTIVITY_START" summary_custom_tracing_triton.log` != "1" ]; then
798+
cat summary_custom_tracing_triton.log
799+
echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_START" events.\n***"
800+
RET=1
801+
fi
802+
803+
if [ `grep -c "CUSTOM_ACTIVITY_END" summary_custom_tracing_triton.log` != "1" ]; then
804+
cat summary_custom_tracing_triton.log
805+
echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_END" events.\n***"
806+
RET=1
807+
fi
808+
745809
# Check opentelemetry trace exporter sends proper info.
746810
# A helper python script starts listening on $OTLP_PORT, where
747811
# OTLP exporter sends traces.
@@ -758,7 +822,7 @@ rm collected_traces.json*
758822
# Unittests then check that produced spans have expected format and events
759823
OPENTELEMETRY_TEST=opentelemetry_unittest.py
760824
OPENTELEMETRY_LOG="opentelemetry_unittest.log"
761-
EXPECTED_NUM_TESTS="17"
825+
EXPECTED_NUM_TESTS="19"
762826

763827
# Set up repo and args for SageMaker
764828
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME="simple"
@@ -772,10 +836,20 @@ cp -r $DATADIR/$MODELBASE/* ${MODEL_PATH} && \
772836
# Add model to test trace context exposed to python backend
773837
mkdir -p $MODELSDIR/trace_context/1 && cp ./trace_context.py $MODELSDIR/trace_context/1/model.py
774838

839+
# set up identity model
840+
rm -r ${MODELSDIR}/custom_identity_int32
841+
mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \
842+
echo 'name: "custom_identity_int32"' >> config.pbtxt && \
843+
echo 'backend: "identity"' >> config.pbtxt && \
844+
echo 'max_batch_size: 1024' >> config.pbtxt && \
845+
echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
846+
echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
847+
echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
848+
echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }, { key: "nested_span_count" \n value: { string_value: "6" } }, { key: "single_activity_frequency" \n value: { string_value: "3" } }]' >> config.pbtxt)
775849

776850
SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \
777851
--load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \
778-
--load-model=repeat_int32 \
852+
--load-model=repeat_int32 --load-model=custom_identity_int32\
779853
--load-model=input_all_required \
780854
--load-model=dynamic_batch \
781855
--load-model=bls_simple --trace-config=level=TIMESTAMPS \
@@ -1164,5 +1238,4 @@ set -e
11641238
kill $SERVER_PID
11651239
wait $SERVER_PID
11661240
set +e
1167-
11681241
exit $RET

0 commit comments

Comments
 (0)