elastic · xrmx · Dec 9, 2024 · Dec 6, 2024
@@ -176,7 +176,9 @@ def _chat_completion_wrapper(self, wrapped, instance, args, kwargs):
             logger.debug(f"openai.resources.chat.completions.Completions.create result: {result}")
 
             if span.is_recording():
-                _set_span_attributes_from_response(span, result.id, result.model, result.choices, result.usage)
+                _set_span_attributes_from_response(
+                    span, result.id, result.model, result.choices, result.usage, getattr(result, "service_tier", None)
+                )
 
             _record_token_usage_metrics(self.token_usage_metric, span, result.usage)
             _record_operation_duration_metric(self.operation_duration_metric, span, start_time)
@@ -231,7 +233,9 @@ async def _async_chat_completion_wrapper(self, wrapped, instance, args, kwargs):
             logger.debug(f"openai.resources.chat.completions.AsyncCompletions.create result: {result}")
 
             if span.is_recording():
-                _set_span_attributes_from_response(span, result.id, result.model, result.choices, result.usage)
+                _set_span_attributes_from_response(
+                    span, result.id, result.model, result.choices, result.usage, getattr(result, "service_tier", None)
+                )
 
             _record_token_usage_metrics(self.token_usage_metric, span, result.usage)
             _record_operation_duration_metric(self.operation_duration_metric, span, start_time)

@@ -16,10 +16,14 @@
 
 from collections.abc import Iterable, Mapping
 from timeit import default_timer
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional
 
 from opentelemetry._events import Event, EventLogger
 from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
+    GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT,
+    GEN_AI_OPENAI_REQUEST_SEED,
+    GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
+    GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
     GEN_AI_OPERATION_NAME,
     GEN_AI_REQUEST_FREQUENCY_PENALTY,
     GEN_AI_REQUEST_MAX_TOKENS,
@@ -65,7 +69,12 @@
 
 
 def _set_span_attributes_from_response(
-    span: Span, response_id: str, model: str, choices, usage: CompletionUsage
+    span: Span,
+    response_id: str,
+    model: str,
+    choices,
+    usage: CompletionUsage,
+    service_tier: Optional[str],
 ) -> None:
     span.set_attribute(GEN_AI_RESPONSE_ID, response_id)
     span.set_attribute(GEN_AI_RESPONSE_MODEL, model)
@@ -76,6 +85,9 @@ def _set_span_attributes_from_response(
     if usage:
         span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage.prompt_tokens)
         span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage.completion_tokens)
+    # this is available only if requested
+    if service_tier:
+        span.set_attribute(GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, service_tier)
 
 
 def _set_embeddings_span_attributes_from_response(span: Span, model: str, usage: CompletionUsage) -> None:
@@ -126,6 +138,17 @@ def _get_span_attributes_from_wrapper(instance, kwargs) -> Attributes:
         if isinstance(stop_sequences, str):
             stop_sequences = [stop_sequences]
         span_attributes[GEN_AI_REQUEST_STOP_SEQUENCES] = stop_sequences
+    if (seed := kwargs.get("seed")) is not None:
+        span_attributes[GEN_AI_OPENAI_REQUEST_SEED] = seed
+    if (service_tier := kwargs.get("service_tier")) is not None:
+        span_attributes[GEN_AI_OPENAI_REQUEST_SERVICE_TIER] = service_tier
+    if (response_format := kwargs.get("response_format")) is not None:
+        # response_format may be string or object with a string in the `type` key
+        if isinstance(response_format, Mapping):
+            if (response_format_type := response_format.get("type")) is not None:
+                span_attributes[GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT] = response_format_type
+        else:
+            span_attributes[GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT] = response_format
 
     return span_attributes
 

@@ -59,6 +59,7 @@ def __init__(
         self.model = None
         self.choices = []
         self.usage = None
+        self.service_tier = None
 
     def end(self, exc=None):
         # StopIteration is not an error, it signals that we have consumed all the stream
@@ -70,7 +71,9 @@ def end(self, exc=None):
             return
 
         if self.span.is_recording():
-            _set_span_attributes_from_response(self.span, self.response_id, self.model, self.choices, self.usage)
+            _set_span_attributes_from_response(
+                self.span, self.response_id, self.model, self.choices, self.usage, self.service_tier
+            )
 
         _record_operation_duration_metric(self.operation_duration_metric, self.span, self.start_time)
         if self.usage:
@@ -92,6 +95,8 @@ def process_chunk(self, chunk):
         # with `include_usage` in `stream_options` we will get a last chunk without choices
         if chunk.choices:
             self.choices += chunk.choices
+        if hasattr(chunk, "service_tier"):
+            self.service_tier = chunk.service_tier
 
     def __enter__(self):
         return self

@@ -12,6 +12,11 @@ interactions:
         "frequency_penalty": 0,
         "max_tokens": 100,
         "presence_penalty": 0,
+        "response_format": {
+          "type": "text"
+        },
+        "seed": 100,
+        "service_tier": "default",
         "stop": "foo",
         "temperature": 1,
         "top_p": 1
@@ -28,29 +33,29 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '244'
+      - '321'
       content-type:
       - application/json
       host:
       - test.openai.azure.com
       user-agent:
-      - AzureOpenAI/Python 1.54.3
+      - AzureOpenAI/Python 1.54.5
       x-stainless-arch:
-      - arm64
+      - x64
       x-stainless-async:
       - 'false'
       x-stainless-lang:
       - python
       x-stainless-os:
-      - MacOS
+      - Linux
       x-stainless-package-version:
-      - 1.54.3
+      - 1.54.5
       x-stainless-retry-count:
       - '0'
       x-stainless-runtime:
       - CPython
       x-stainless-runtime-version:
-      - 3.12.6
+      - 3.10.12
     method: POST
     uri: https://test.openai.azure.com/openai/deployments/test-azure-deployment/chat/completions?api-version=2024-08-01-preview
   response:
@@ -64,6 +69,14 @@ interactions:
                   "filtered": false,
                   "severity": "safe"
                 },
+                "protected_material_code": {
+                  "filtered": false,
+                  "detected": false
+                },
+                "protected_material_text": {
+                  "filtered": false,
+                  "detected": false
+                },
                 "self_harm": {
                   "filtered": false,
                   "severity": "safe"
@@ -81,14 +94,14 @@ interactions:
               "index": 0,
               "logprobs": null,
               "message": {
-                "content": "Atlantic Ocean",
+                "content": "South Atlantic Ocean.",
                 "role": "assistant"
               }
             }
           ],
-          "created": 1731466203,
-          "id": "chatcmpl-ASxkBZGOa53uXX1Ciygl77IrF8PbB",
-          "model": "gpt-4-32k",
+          "created": 1733409253,
+          "id": "chatcmpl-Ab7DhFk7vSvmMW4ICIZh0gkvTZn7G",
+          "model": "gpt-4o-mini",
           "object": "chat.completion",
           "prompt_filter_results": [
             {
@@ -98,6 +111,10 @@ interactions:
                   "filtered": false,
                   "severity": "safe"
                 },
+                "jailbreak": {
+                  "filtered": false,
+                  "detected": false
+                },
                 "self_harm": {
                   "filtered": false,
                   "severity": "safe"
@@ -113,48 +130,46 @@ interactions:
               }
             }
           ],
-          "system_fingerprint": null,
+          "system_fingerprint": "fp_04751d0b65",
           "usage": {
-            "completion_tokens": 2,
+            "completion_tokens": 4,
             "prompt_tokens": 24,
-            "total_tokens": 26
+            "total_tokens": 28
           }
         }
     headers:
-      Cache-Control:
-      - no-cache, must-revalidate
       Content-Length:
-      - '805'
+      - '997'
       Content-Type:
       - application/json
       Date:
-      - Wed, 13 Nov 2024 02:50:02 GMT
+      - Thu, 05 Dec 2024 14:34:13 GMT
       Set-Cookie: test_set_cookie
       Strict-Transport-Security:
       - max-age=31536000; includeSubDomains; preload
-      access-control-allow-origin:
-      - '*'
       apim-request-id:
-      - f0e5ae5b-b609-4908-bedb-533ec71e9bfa
+      - ad6ebb52-6f0c-427c-b4cd-a186597cff93
       azureml-model-session:
-      - d156-20241010120317
+      - d029-20241115170135
       openai-organization: test_openai_org_id
       x-accel-buffering:
       - 'no'
       x-content-type-options:
       - nosniff
+      x-envoy-upstream-service-time:
+      - '180'
       x-ms-client-request-id:
-      - f0e5ae5b-b609-4908-bedb-533ec71e9bfa
+      - ad6ebb52-6f0c-427c-b4cd-a186597cff93
       x-ms-rai-invoked:
       - 'true'
       x-ms-region:
-      - Switzerland North
+      - East US
       x-ratelimit-remaining-requests:
-      - '78'
+      - '909'
       x-ratelimit-remaining-tokens:
-      - '79884'
+      - '90883'
       x-request-id:
-      - 3a1ee803-cce9-472f-ad04-2d0757009288
+      - 80dd2ee4-7ce2-4d04-a114-efb137a58ed4
     status:
       code: 200
       message: OK

@@ -12,6 +12,11 @@ interactions:
         "frequency_penalty": 0,
         "max_tokens": 100,
         "presence_penalty": 0,
+        "response_format": {
+          "type": "text"
+        },
+        "seed": 100,
+        "service_tier": "default",
         "stop": "foo",
         "temperature": 1,
         "top_p": 1
@@ -26,65 +31,65 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '250'
+      - '327'
       content-type:
       - application/json
       host:
       - localhost:11434
       user-agent:
-      - OpenAI/Python 1.50.2
+      - OpenAI/Python 1.54.5
       x-stainless-arch:
-      - arm64
+      - x64
       x-stainless-async:
       - 'false'
       x-stainless-lang:
       - python
       x-stainless-os:
-      - MacOS
+      - Linux
       x-stainless-package-version:
-      - 1.50.2
+      - 1.54.5
       x-stainless-retry-count:
       - '0'
       x-stainless-runtime:
       - CPython
       x-stainless-runtime-version:
-      - 3.12.6
+      - 3.10.12
     method: POST
     uri: http://localhost:11434/v1/chat/completions
   response:
     body:
       string: |-
         {
-          "id": "chatcmpl-46",
+          "id": "chatcmpl-593",
           "object": "chat.completion",
-          "created": 1731311779,
+          "created": 1733409255,
           "model": "qwen2.5:0.5b",
           "system_fingerprint": "fp_ollama",
           "choices": [
             {
               "index": 0,
               "message": {
                 "role": "assistant",
-                "content": "The Falklands Islands are located in Atlantic Oceans."
+                "content": "Amalfis Sea"
               },
               "finish_reason": "stop"
             }
           ],
           "usage": {
             "prompt_tokens": 46,
-            "completion_tokens": 12,
-            "total_tokens": 58
+            "completion_tokens": 5,
+            "total_tokens": 51
           }
         }
     headers:
       Content-Length:
-      - '339'
+      - '297'
       Content-Type:
       - application/json
       Date:
-      - Mon, 11 Nov 2024 07:56:19 GMT
+      - Thu, 05 Dec 2024 14:34:15 GMT
       Set-Cookie: test_set_cookie
-      openai-organization: test_openai_org_key
+      openai-organization: test_openai_org_id
     status:
       code: 200
       message: OK