Tracing: extract context from incoming request (#77)

SilverSoldier · web-flow · commit 0a73cb7768f2 · 2024-04-10T01:45:48.000-07:00
#### Motivation

When receiving request from upstream component, extract context to read
trace parent info and set to current span

#### Modifications

1. Extract context from request
2. Set tracing service name through argument or env variable

Signed-off-by: Kavya Govindarajan &lt;kavya.g@ibm.com&gt;
Co-authored-by: Kavya Govindarajan &lt;kavya.g@ibm.com&gt;
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
@@ -91,6 +91,8 @@ struct Args {
     default_include_stop_seqs: bool,
     #[clap(long, env)]
     otlp_endpoint: Option<String>,
+    #[clap(long, env)]
+    otlp_service_name: Option<String>,
 }
 
 fn main() -> ExitCode {
@@ -331,6 +333,11 @@ fn main() -> ExitCode {
     if let Some(otlp_endpoint) = args.otlp_endpoint {
         argv.push("--otlp-endpoint".to_string());
         argv.push(otlp_endpoint);
+
+        if let Some(otlp_service_name) = args.otlp_service_name {
+            argv.push("--otlp-service-name".to_string());
+            argv.push(otlp_service_name);
+        }
     }
 
     if args.output_special_tokens {
diff --git a/router/src/grpc_server.rs b/router/src/grpc_server.rs
@@ -28,6 +28,7 @@ use crate::{
     },
     server::ServerState,
     tokenizer::AsyncTokenizer,
+    tracing::ExtractTelemetryContext,
     validation::{RequestSize, ValidationError},
     GenerateParameters, GenerateRequest,
 };
@@ -119,6 +120,7 @@ impl GenerationService for GenerationServicer {
         request: Request<BatchedGenerationRequest>,
     ) -> Result<Response<BatchedGenerationResponse>, Status> {
         let start_time = Instant::now();
+        let request = request.extract_context();
         let br = request.into_inner();
         let batch_size = br.requests.len();
         let kind = if batch_size == 1 { "single" } else { "batch" };
@@ -251,6 +253,7 @@ impl GenerationService for GenerationServicer {
         request: Request<SingleGenerationRequest>,
     ) -> Result<Response<Self::GenerateStreamStream>, Status> {
         let start_time = Instant::now();
+        let request = request.extract_context();
         metrics::increment_counter!("tgi_request_count", "kind" => "stream");
         self.input_counter.increment(1);
         let permit = self
diff --git a/router/src/lib.rs b/router/src/lib.rs
@@ -10,6 +10,7 @@ mod queue;
 pub mod server;
 mod tokenizer;
 mod validation;
+mod tracing;
 
 use batcher::Batcher;
 use serde::{Deserialize, Serialize};
diff --git a/router/src/main.rs b/router/src/main.rs
@@ -61,6 +61,12 @@ struct Args {
     default_include_stop_seqs: bool,
     #[clap(long, env)]
     otlp_endpoint: Option<String>,
+    #[clap(
+        long,
+        env = "OTEL_SERVICE_NAME",
+        default_value = "text-generation-inference.router"
+    )]
+    otlp_service_name: String,
 }
 
 fn main() -> Result<(), std::io::Error> {
@@ -106,7 +112,7 @@ fn main() -> Result<(), std::io::Error> {
         .build()
         .unwrap()
         .block_on(async {
-            init_logging(args.otlp_endpoint, args.json_output);
+            init_logging(args.otlp_endpoint, args.json_output, args.otlp_service_name);
             // Instantiate sharded client from the master unix socket
             let mut sharded_client = ShardedClient::connect_uds(args.master_shard_uds_path)
                 .await
@@ -206,7 +212,7 @@ fn write_termination_log(msg: &str) -> Result<(), io::Error> {
     Ok(())
 }
 
-fn init_logging(otlp_endpoint: Option<String>, json_output: bool) {
+fn init_logging(otlp_endpoint: Option<String>, json_output: bool, otlp_service_name: String) {
     let mut layers = Vec::new();
 
     // STDOUT/STDERR layer
@@ -235,7 +241,7 @@ fn init_logging(otlp_endpoint: Option<String>, json_output: bool) {
                 trace::config()
                     .with_resource(Resource::new(vec![KeyValue::new(
                         "service.name",
-                        "text-generation-inference.router",
+                        otlp_service_name,
                     )]))
                     .with_sampler(Sampler::AlwaysOn),
             )
diff --git a/router/src/tracing.rs b/router/src/tracing.rs
@@ -0,0 +1,44 @@
+//! Inspired by: https://github.com/open-telemetry/opentelemetry-rust gRPC examples
+
+use opentelemetry::{global, propagation::Extractor};
+use tonic::Request;
+use tracing::Span;
+use tracing_opentelemetry::OpenTelemetrySpanExt;
+
+struct MetadataExtractor<'a>(&'a tonic::metadata::MetadataMap);
+
+impl<'a> Extractor for MetadataExtractor<'a> {
+    /// Get a value for a key from the MetadataMap.  If the value can't be converted to &str, returns None
+    fn get(&self, key: &str) -> Option<&str> {
+        self.0.get(key).and_then(|metadata| metadata.to_str().ok())
+    }
+
+    /// Collect all the keys from the MetadataMap.
+    fn keys(&self) -> Vec<&str> {
+        self.0
+            .keys()
+            .map(|key| match key {
+                tonic::metadata::KeyRef::Ascii(v) => v.as_str(),
+                tonic::metadata::KeyRef::Binary(v) => v.as_str(),
+            })
+            .collect::<Vec<_>>()
+    }
+}
+
+/// Extract context from metadata and set as current span's context
+fn extract(metadata: &tonic::metadata::MetadataMap) {
+    let parent_cx =
+        global::get_text_map_propagator(|prop| prop.extract(&MetadataExtractor(metadata)));
+    Span::current().set_parent(parent_cx);
+}
+
+pub trait ExtractTelemetryContext {
+    fn extract_context(self) -> Self;
+}
+
+impl<T> ExtractTelemetryContext for Request<T> {
+    fn extract_context(self) -> Self {
+        extract(self.metadata());
+        self
+    }
+}