@@ -335,13 +335,23 @@ TraceManager::SampleTrace(const TraceStartOptions& start_options)
335335 std::chrono::duration_cast<std::chrono::nanoseconds>(
336336 std::chrono::steady_clock::now ().time_since_epoch ())
337337 .count ();
338- ts->otel_context_ = start_options.propagated_context ;
339- opentelemetry::nostd::shared_ptr<otel_trace_api::Span> root_span;
340- root_span = ts->StartSpan (
341- " InferRequest" , steady_timestamp_ns, otel_trace_api::kSpanKey );
338+ if (ts->span_stacks_ .find (ts->trace_id_ ) == ts->span_stacks_ .end ()) {
339+ std::unique_ptr<
340+ std::stack<opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>>
341+ st (new std::stack<
342+ opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>());
343+ ts->span_stacks_ .emplace (ts->trace_id_ , std::move (st));
344+ }
345+ auto active_span =
346+ otel_trace_api::GetSpan (start_options.propagated_context );
347+ if (active_span->GetContext ().IsValid ()) {
348+ ts->span_stacks_ [ts->trace_id_ ]->emplace (active_span);
349+ }
342350 // Storing "InferRequest" span as a root span
343351 // to keep it alive for the duration of the request.
344- ts->otel_context_ = ts->otel_context_ .SetValue (kRootSpan , root_span);
352+ ts->root_span_ =
353+ ts->StartSpan (" InferRequest" , steady_timestamp_ns, ts->trace_id_ );
354+ ts->span_stacks_ [ts->trace_id_ ]->emplace (ts->root_span_ );
345355#else
346356 LOG_ERROR << " Unsupported trace mode: "
347357 << TraceManager::InferenceTraceModeString (ts->setting_ ->mode_ );
@@ -358,7 +368,7 @@ TraceManager::Trace::~Trace()
358368 setting_->WriteTrace (streams_);
359369 } else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
360370#ifndef _WIN32
361- EndSpan (kRootSpan );
371+ EndSpan (trace_id_ );
362372#else
363373 LOG_ERROR << " Unsupported trace mode: "
364374 << TraceManager::InferenceTraceModeString (setting_->mode_ );
@@ -390,7 +400,8 @@ TraceManager::Trace::CaptureTimestamp(
390400 << " {\" name\" :\" " << name << " \" ,\" ns\" :" << timestamp_ns << " }]}" ;
391401 } else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
392402#ifndef _WIN32
393- AddEvent (kRootSpan , name, timestamp_ns);
403+ root_span_->AddEvent (
404+ name, time_offset_ + std::chrono::nanoseconds{timestamp_ns});
394405#else
395406 LOG_ERROR << " Unsupported trace mode: "
396407 << TraceManager::InferenceTraceModeString (setting_->mode_ );
@@ -501,15 +512,15 @@ TraceManager::ProcessOpenTelemetryParameters(
501512
502513void
503514TraceManager::Trace::StartSpan (
504- std::string span_key, TRITONSERVER_InferenceTrace* trace,
515+ TRITONSERVER_InferenceTrace* trace,
505516 TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
506517 uint64_t trace_id)
507518{
508519 uint64_t parent_id;
509520 LOG_TRITONSERVER_ERROR (
510521 TRITONSERVER_InferenceTraceParentId (trace, &parent_id),
511522 " getting trace parent id" );
512- std::string parent_span_key = " " ;
523+ auto span_parent_id = parent_id ;
513524
514525 // Currently, only 2 types of sub-spans are supported:
515526 // request span and compute span. Compute span is a leaf span
@@ -521,16 +532,9 @@ TraceManager::Trace::StartSpan(
521532 // If parent_id > 0, then this is a child trace, spawned from
522533 // the ensamble's main request. For this instance, the parent
523534 // span is the ensembles's request span.
524- if (parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) {
525- parent_span_key = kRootSpan ;
526- } else if (activity == TRITONSERVER_TRACE_REQUEST_START) {
527- // [FIXME] For BLS requests parent span for children's request spans
528- // should be parent model's compute span. Currently,
529- // this won't work, since parent's compute span will be created
530- // only after children's spans are created.
531- parent_span_key = kRequestSpan + std::to_string (parent_id);
532- } else if (activity == TRITONSERVER_TRACE_COMPUTE_START) {
533- parent_span_key = kRequestSpan + std::to_string (trace_id);
535+ if ((parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) ||
536+ (activity == TRITONSERVER_TRACE_COMPUTE_START)) {
537+ span_parent_id = trace_id;
534538 }
535539
536540 std::string display_name = " compute" ;
@@ -542,7 +546,7 @@ TraceManager::Trace::StartSpan(
542546 display_name = model_name;
543547 }
544548
545- auto span = StartSpan (display_name, timestamp_ns, parent_span_key );
549+ auto span = StartSpan (display_name, timestamp_ns, span_parent_id );
546550
547551 if (activity == TRITONSERVER_TRACE_REQUEST_START) {
548552 int64_t model_version;
@@ -564,14 +568,13 @@ TraceManager::Trace::StartSpan(
564568 PrepareTraceContext (span, &buffer);
565569 TRITONSERVER_InferenceTraceSetContext (trace, buffer.Contents ().c_str ());
566570 }
567-
568- otel_context_ = otel_context_.SetValue (span_key, span);
571+ span_stacks_[trace_id]->emplace (span);
569572}
570573
571574opentelemetry::nostd::shared_ptr<otel_trace_api::Span>
572575TraceManager::Trace::StartSpan (
573576 std::string display_name, const uint64_t & raw_timestamp_ns,
574- std::string parent_span_key )
577+ uint64_t trace_id )
575578{
576579 otel_trace_api::StartSpanOptions options;
577580 options.kind = otel_trace_api::SpanKind::kServer ;
@@ -580,45 +583,37 @@ TraceManager::Trace::StartSpan(
580583 options.start_steady_time =
581584 otel_common::SteadyTimestamp{std::chrono::nanoseconds{raw_timestamp_ns}};
582585
583- // If the new span is a child span, we need to retrieve its parent from
584- // the context and provide it through StartSpanOptions to the child span
585- if (!parent_span_key.empty () && otel_context_.HasKey (parent_span_key)) {
586- auto parent_span = opentelemetry::nostd::get<
587- opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
588- otel_context_.GetValue (parent_span_key));
589- options.parent = parent_span->GetContext ();
586+ // If the new span is a child span, we need to retrieve its parent and
587+ // provide it through StartSpanOptions to the child span
588+ if (span_stacks_.find (trace_id) != span_stacks_.end () &&
589+ !span_stacks_[trace_id]->empty ()) {
590+ options.parent = span_stacks_[trace_id]->top ()->GetContext ();
590591 }
591592 auto provider = opentelemetry::trace::Provider::GetTracerProvider ();
592593 return provider->GetTracer (kTritonTracer )->StartSpan (display_name, options);
593594}
594595
595596void
596- TraceManager::Trace::EndSpan (std::string span_key )
597+ TraceManager::Trace::EndSpan (uint64_t trace_id )
597598{
598599 auto timestamp_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
599600 std::chrono::steady_clock::now ().time_since_epoch ())
600601 .count ();
601- EndSpan (span_key, timestamp_ns );
602+ EndSpan (timestamp_ns, trace_id );
602603}
603604
604605
605606void
606607TraceManager::Trace::EndSpan (
607- std::string span_key, const uint64_t & raw_timestamp_ns)
608+ const uint64_t & raw_timestamp_ns, uint64_t trace_id )
608609{
609- if (otel_context_.HasKey (span_key)) {
610- auto span = opentelemetry::nostd::get<
611- opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
612- otel_context_.GetValue (span_key));
613-
614- if (span == nullptr ) {
615- return ;
616- }
617-
610+ if (span_stacks_.find (trace_id) != span_stacks_.end () &&
611+ !span_stacks_[trace_id]->empty ()) {
618612 otel_trace_api::EndSpanOptions end_options;
619613 end_options.end_steady_time = otel_common::SteadyTimestamp{
620614 std::chrono::nanoseconds{raw_timestamp_ns}};
621- span->End (end_options);
615+ span_stacks_[trace_id]->top ()->End (end_options);
616+ span_stacks_[trace_id]->pop ();
622617 }
623618}
624619
@@ -630,79 +625,46 @@ TraceManager::Trace::ReportToOpenTelemetry(
630625 uint64_t id;
631626 LOG_TRITONSERVER_ERROR (
632627 TRITONSERVER_InferenceTraceId (trace, &id), " getting trace id" );
633-
634- auto current_span_key = GetSpanKeyForActivity (activity, id);
635- if (current_span_key.empty ()) {
636- return ;
628+ if (span_stacks_.find (id) == span_stacks_.end ()) {
629+ std::unique_ptr<
630+ std::stack<opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>>
631+ st (new std::stack<
632+ opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>());
633+ span_stacks_.emplace (id, std::move (st));
637634 }
638635
639- AddEvent (current_span_key, trace, activity, timestamp_ns, id);
640- }
641-
642- std::string
643- TraceManager::Trace::GetSpanKeyForActivity (
644- TRITONSERVER_InferenceTraceActivity activity, uint64_t trace_id)
645- {
646- std::string span_name;
647- switch (activity) {
648- case TRITONSERVER_TRACE_REQUEST_START:
649- case TRITONSERVER_TRACE_QUEUE_START:
650- case TRITONSERVER_TRACE_REQUEST_END: {
651- span_name = kRequestSpan + std::to_string (trace_id);
652- break ;
653- }
654-
655- case TRITONSERVER_TRACE_COMPUTE_START:
656- case TRITONSERVER_TRACE_COMPUTE_INPUT_END:
657- case TRITONSERVER_TRACE_COMPUTE_OUTPUT_START:
658- case TRITONSERVER_TRACE_COMPUTE_END: {
659- span_name = kComputeSpan + std::to_string (trace_id);
660- break ;
661- }
662- case TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT:
663- case TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT:
664- case TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT:
665- default : {
666- LOG_ERROR << " Unsupported activity: "
667- << TRITONSERVER_InferenceTraceActivityString (activity);
668- span_name = " " ;
669- break ;
670- }
671- }
672-
673- return span_name;
636+ AddEvent (trace, activity, timestamp_ns, id);
674637}
675638
676639void
677640TraceManager::Trace::AddEvent (
678- std::string span_key, TRITONSERVER_InferenceTrace* trace,
641+ TRITONSERVER_InferenceTrace* trace,
679642 TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
680- uint64_t id )
643+ uint64_t trace_id )
681644{
682645 if (activity == TRITONSERVER_TRACE_REQUEST_START ||
683646 activity == TRITONSERVER_TRACE_COMPUTE_START) {
684- StartSpan (span_key, trace, activity, timestamp_ns, id );
647+ StartSpan (trace, activity, timestamp_ns, trace_id );
685648 }
686649
687650 AddEvent (
688- span_key, TRITONSERVER_InferenceTraceActivityString (activity),
689- timestamp_ns );
651+ TRITONSERVER_InferenceTraceActivityString (activity), timestamp_ns ,
652+ trace_id );
690653
691654 if (activity == TRITONSERVER_TRACE_REQUEST_END ||
692655 activity == TRITONSERVER_TRACE_COMPUTE_END) {
693- EndSpan (span_key, timestamp_ns );
656+ EndSpan (timestamp_ns, trace_id );
694657 }
695658}
696659
697660void
698661TraceManager::Trace::AddEvent (
699- std::string span_key, std::string event, uint64_t timestamp)
662+ const std::string& event, uint64_t timestamp, uint64_t trace_id )
700663{
701- if (otel_context_.HasKey (span_key)) {
702- auto span = opentelemetry::nostd::get<
703- opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
704- otel_context_.GetValue (span_key));
705- span->AddEvent (event, time_offset_ + std::chrono::nanoseconds{timestamp});
664+ if (span_stacks_.find (trace_id) != span_stacks_.end () &&
665+ !span_stacks_[trace_id]->empty ()) {
666+ span_stacks_[trace_id]->top ()->AddEvent (
667+ event, time_offset_ + std::chrono::nanoseconds{timestamp});
706668 }
707669}
708670
0 commit comments