From ef911fe58678918a66d88096a1a618ee0c59f75b Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
Date: Thu, 14 Nov 2024 15:31:52 +0100
Subject: [PATCH 1/4] elastic-opentelemetry-instrumentation-openai: test
 against baseline

1.26.0 is required for having usage data with streams but with some
small changes we can keep the baseline as old as 1.2.0.
---
 .github/workflows/ci.yml                      | 19 ++++++++++++++-----
 .../pyproject.toml                            |  3 ++-
 .../instrumentation/openai/package.py         |  2 +-
 .../instrumentation/openai/wrappers.py        |  4 +++-
 .../tests/test_chat_completions.py            |  4 +++-
 5 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 615ae3a..4ae2add 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -35,7 +35,7 @@ jobs:
     - run: pip install .
       working-directory: ${{ matrix.working-dir }}
 
-  test:
+  test-openai:
     runs-on: ubuntu-latest
     env:
       py38: "3.8"
@@ -43,11 +43,15 @@ jobs:
       py310: "3.10"
       py311: "3.11"
       py312: "3.12"
+      # Baseline version of openai client we support
+      baseline: "1.2.0"
+      latest: ""
+      working_dir: "instrumentation/elastic-opentelemetry-instrumentation-openai"
     strategy:
       fail-fast: false
       matrix:
-        working-dir: ['instrumentation/elastic-opentelemetry-instrumentation-openai']
         python-version: [py38, py39, py310, py311, py312]
+        openai-version: [baseline, latest]
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python ${{ env[matrix.python-version] }}
@@ -57,9 +61,14 @@ jobs:
         architecture: "x64"
     - if: ${{ env[matrix.python-version] == '3.8' || env[matrix.python-version] == '3.9' }}
       run: pip install -r dev-requirements-3.9.txt
-      working-directory: ${{ matrix.working-dir }}
+      working-directory: ${{ env.working_dir }}
     - if: ${{ env[matrix.python-version] != '3.8' && env[matrix.python-version] != '3.9' }}
       run: pip install -r dev-requirements.txt
-      working-directory: ${{ matrix.working-dir }}
+      working-directory: ${{ env.working_dir }}
+    - if: ${{ env[matrix.openai-version] }}
+      name: update openai to required version if not latest
+      run:
+        pip install openai==${{ env[matrix.openai-version] }}
+      working-directory: ${{ env.working_dir }}
     - run: pytest
-      working-directory: ${{ matrix.working-dir }}
+      working-directory: ${{ env.working_dir }}
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/pyproject.toml b/instrumentation/elastic-opentelemetry-instrumentation-openai/pyproject.toml
index 367ad86..c2cc468 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/pyproject.toml
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
     "opentelemetry-api ~= 1.28.1",
     "opentelemetry-instrumentation ~= 0.49b1",
     "opentelemetry-semantic-conventions ~= 0.49b1",
+    "wrapt >= 1.0.0, < 2.0.0",
 ]
 
 [project.readme]
@@ -42,7 +43,7 @@ Homepage = "https://github.com/elastic/elastic-otel-python-instrumentations"
 [project.optional-dependencies]
 dev = ["pytest", "pip-tools", "openai", "numpy", "opentelemetry-test-utils", "vcrpy", "pytest-asyncio", "pytest-vcr"]
 instruments = [
-  "openai >= 1.0.0",
+  "openai >= 1.2.0",
 ]
 
 [project.entry-points.opentelemetry_instrumentor]
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/package.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/package.py
index d034f6e..be9cf1c 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/package.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/package.py
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-_instruments = ("openai >= 1.0.0",)
+_instruments = ("openai >= 1.2.0",)
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py
index d7a2acc..cf4cfa9 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py
@@ -104,7 +104,9 @@ def end(self, exc=None):
     def process_chunk(self, chunk):
         self.response_id = chunk.id
         self.model = chunk.model
-        self.usage = chunk.usage
+        # usage with streaming is available since 1.26.0
+        if hasattr(chunk, "usage"):
+            self.usage = chunk.usage
         # with `include_usage` in `stream_options` we will get a last chunk without choices
         if chunk.choices:
             self.choices += chunk.choices
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py
index 2b0c344..50a4954 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py
@@ -48,6 +48,8 @@
 )
 from .utils import get_sorted_metrics, logrecords_from_logs
 
+OPENAI_VERSION = tuple([int(x) for x in openai.version.VERSION.split(".")])
+
 providers = ["openai_provider_chat_completions", "ollama_provider_chat_completions", "azure_provider_chat_completions"]
 
 
@@ -1288,7 +1290,7 @@ def test_stream(
     ),
 ]
 
-
+@pytest.mark.skipif(OPENAI_VERSION < (1, 26, 0), reason="stream_options added in 1.26.0")
 @pytest.mark.vcr()
 @pytest.mark.parametrize(
     "provider_str,model,response_model,content,response_id,input_tokens,output_tokens,duration",

From 45df683803d246a0a9d7f2c128c5078ea83fe39c Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
Date: Thu, 14 Nov 2024 15:51:48 +0100
Subject: [PATCH 2/4] elastic-opentelemetry-instrumentation-openai: add openai
 integration tests

This adds a bunch of integration tests hitting openai. This requires
setting a variable `OPENAI_API_KEY` in github secrets. Tests themselves
are a copy of the recorded ones asserting values from the response data.

Integration tests are not run by default and can be called with `pytest
--integration-tests`
---
 .github/workflows/ci.yml                      |   6 +
 .../README.md                                 |   6 +
 .../tests/conftest.py                         |  23 +
 .../tests/test_chat_completions.py            | 418 +++++++++++++++++-
 .../tests/test_embeddings.py                  |  95 +++-
 .../tests/utils.py                            |  20 +-
 6 files changed, 562 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4ae2add..3909c62 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -72,3 +72,9 @@ jobs:
       working-directory: ${{ env.working_dir }}
     - run: pytest
       working-directory: ${{ env.working_dir }}
+    - if: ${{ env[matrix.python-version] == '3.12' && !env[matrix.openai-version] }}
+      # Only run on latest python and openai client version because we are calling openai
+      run: pytest --integration-tests
+      working-directory: ${{ env.working_dir }}
+      env:
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md b/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md
index ce8d9b9..eb6832d 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md
@@ -71,6 +71,12 @@ pip install -r dev-requirements.txt
 pytest
 ```
 
+To run integration tests doing real requests:
+
+```
+OPENAI_API_KEY=unused pytest --integration-tests
+```
+
 ## Refreshing HTTP payloads
 
 We use [VCR.py](https://vcrpy.readthedocs.io/en/latest/) to automatically record HTTP responses from
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/conftest.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/conftest.py
index 9e42125..e3b2437 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/conftest.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/conftest.py
@@ -498,3 +498,26 @@ def assert_token_usage_metric(
             ),
         ],
     )
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--integration-tests",
+        action="store_true",
+        default=False,
+        help="run integrations tests doing real requests",
+    )
+
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "integration: mark integration tests")
+
+
+def pytest_collection_modifyitems(config, items):
+    run_integration_tests = bool(config.getoption("integration_tests"))
+    reason = "running integrations tests only" if run_integration_tests else "skipping integration tests"
+    skip_mark = pytest.mark.skip(reason=reason)
+    for item in items:
+        test_is_integration = "integration" in item.keywords
+        if run_integration_tests != test_is_integration:
+            item.add_marker(skip_mark)
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py
index 50a4954..26bd9a3 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py
@@ -46,7 +46,7 @@
     assert_operation_duration_metric,
     assert_token_usage_metric,
 )
-from .utils import get_sorted_metrics, logrecords_from_logs
+from .utils import MOCK_POSITIVE_FLOAT, get_sorted_metrics, logrecords_from_logs
 
 OPENAI_VERSION = tuple([int(x) for x in openai.version.VERSION.split(".")])
 
@@ -874,6 +874,146 @@ def test_tools_with_capture_content_log_events(
     )
 
 
+@pytest.mark.integration
+@pytest.mark.parametrize(
+    "provider_str,model,response_model",
+    [
+        (
+            "openai_provider_chat_completions",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+        ),
+    ],
+)
+def test_tools_with_capture_content_log_events_integration(
+    provider_str,
+    model,
+    response_model,
+    trace_exporter,
+    logs_exporter,
+    metrics_reader,
+    request,
+):
+    provider = request.getfixturevalue(provider_str)
+    client = provider.get_client()
+
+    # Redo the instrumentation dance to be affected by the environment variable
+    OpenAIInstrumentor().uninstrument()
+    with mock.patch.dict(
+        "os.environ", {"ELASTIC_OTEL_GENAI_CAPTURE_CONTENT": "true", "ELASTIC_OTEL_GENAI_EVENTS": "log"}
+    ):
+        OpenAIInstrumentor().instrument()
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_delivery_date",
+                "description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "order_id": {
+                            "type": "string",
+                            "description": "The customer's order ID.",
+                        },
+                    },
+                    "required": ["order_id"],
+                    "additionalProperties": False,
+                },
+            },
+        }
+    ]
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user.",
+        },
+        {"role": "user", "content": "Hi, can you tell me the delivery date for my order?"},
+        {
+            "role": "assistant",
+            "content": "Hi there! I can help with that. Can you please provide your order ID?",
+        },
+        {"role": "user", "content": "i think it is order_12345"},
+    ]
+
+    response = client.chat.completions.create(model=model, messages=messages, tools=tools)
+    tool_call = response.choices[0].message.tool_calls[0]
+    assert tool_call.function.name == "get_delivery_date"
+    assert json.loads(tool_call.function.arguments) == {"order_id": "order_12345"}
+
+    spans = trace_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+    span = spans[0]
+    assert span.name == f"chat {model}"
+    assert span.kind == SpanKind.CLIENT
+    assert span.status.status_code == StatusCode.UNSET
+
+    assert dict(span.attributes) == {
+        GEN_AI_OPERATION_NAME: "chat",
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_SYSTEM: "openai",
+        GEN_AI_RESPONSE_ID: response.id,
+        GEN_AI_RESPONSE_MODEL: response_model,
+        GEN_AI_RESPONSE_FINISH_REASONS: ("tool_calls",),
+        GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens,
+        GEN_AI_USAGE_OUTPUT_TOKENS: response.usage.completion_tokens,
+        SERVER_ADDRESS: provider.server_address,
+        SERVER_PORT: provider.server_port,
+    }
+
+    logs = logs_exporter.get_finished_logs()
+    assert len(logs) == 5
+    log_records = logrecords_from_logs(logs)
+    system_message, user_message, assistant_message, second_user_message, choice = log_records
+    assert system_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.system.message"}
+    assert system_message.body == {
+        "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."
+    }
+    assert user_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"}
+    assert user_message.body == {"content": "Hi, can you tell me the delivery date for my order?"}
+    assert assistant_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.assistant.message"}
+    assert assistant_message.body == {
+        "content": "Hi there! I can help with that. Can you please provide your order ID?"
+    }
+    assert second_user_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"}
+    assert second_user_message.body == {"content": "i think it is order_12345"}
+    assert choice.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.choice"}
+
+    expected_body = {
+        "finish_reason": "tool_calls",
+        "index": 0,
+        "message": {
+            "tool_calls": [
+                {
+                    "function": {"arguments": '{"order_id":"order_12345"}', "name": "get_delivery_date"},
+                    "id": tool_call.id,
+                    "type": "function",
+                },
+            ],
+        },
+    }
+    assert dict(choice.body) == expected_body
+
+    operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader)
+    attributes = {
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_RESPONSE_MODEL: response_model,
+    }
+    assert_operation_duration_metric(
+        provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT
+    )
+    assert_token_usage_metric(
+        provider,
+        token_usage_metric,
+        attributes=attributes,
+        input_data_point=response.usage.prompt_tokens,
+        output_data_point=response.usage.completion_tokens,
+    )
+
+
 test_connection_error_test_data = [
     (
         "openai_provider_chat_completions",
@@ -1059,6 +1199,92 @@ def test_basic_with_capture_content(
     )
 
 
+@pytest.mark.integration
+@pytest.mark.parametrize(
+    "provider_str,model,response_model",
+    [
+        (
+            "openai_provider_chat_completions",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+        )
+    ],
+)
+def test_basic_with_capture_content_integration(
+    provider_str,
+    model,
+    response_model,
+    trace_exporter,
+    metrics_reader,
+    request,
+):
+    provider = request.getfixturevalue(provider_str)
+
+    # Redo the instrumentation dance to be affected by the environment variable
+    OpenAIInstrumentor().uninstrument()
+    with mock.patch.dict("os.environ", {"ELASTIC_OTEL_GENAI_CAPTURE_CONTENT": "true"}):
+        OpenAIInstrumentor().instrument()
+
+    client = provider.get_client()
+
+    messages = [
+        {
+            "role": "user",
+            "content": "Answer in up to 3 words: Which ocean contains the falkland islands?",
+        }
+    ]
+
+    response = client.chat.completions.create(model=model, messages=messages)
+    content = response.choices[0].message.content
+    assert content
+
+    spans = trace_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+    span = spans[0]
+    assert span.name == f"chat {model}"
+    assert span.kind == SpanKind.CLIENT
+    assert span.status.status_code == StatusCode.UNSET
+
+    assert dict(span.attributes) == {
+        GEN_AI_OPERATION_NAME: "chat",
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_SYSTEM: "openai",
+        GEN_AI_RESPONSE_ID: response.id,
+        GEN_AI_RESPONSE_MODEL: response_model,
+        GEN_AI_RESPONSE_FINISH_REASONS: ("stop",),
+        GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens,
+        GEN_AI_USAGE_OUTPUT_TOKENS: response.usage.completion_tokens,
+        SERVER_ADDRESS: provider.server_address,
+        SERVER_PORT: provider.server_port,
+    }
+
+    assert len(span.events) == 2
+    prompt_event, completion_event = span.events
+    assert prompt_event.name == "gen_ai.content.prompt"
+    assert dict(prompt_event.attributes) == {"gen_ai.prompt": json.dumps(messages)}
+    assert completion_event.name == "gen_ai.content.completion"
+    assert dict(completion_event.attributes) == {
+        "gen_ai.completion": '[{"role": "assistant", "content": "' + content + '"}]'
+    }
+
+    operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader)
+    attributes = {
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_RESPONSE_MODEL: response_model,
+    }
+    assert_operation_duration_metric(
+        provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT
+    )
+    assert_token_usage_metric(
+        provider,
+        token_usage_metric,
+        attributes=attributes,
+        input_data_point=response.usage.prompt_tokens,
+        output_data_point=response.usage.completion_tokens,
+    )
+
+
 test_basic_with_capture_content_log_events_test_data = [
     (
         "openai_provider_chat_completions",
@@ -1290,6 +1516,7 @@ def test_stream(
     ),
 ]
 
+
 @pytest.mark.skipif(OPENAI_VERSION < (1, 26, 0), reason="stream_options added in 1.26.0")
 @pytest.mark.vcr()
 @pytest.mark.parametrize(
@@ -1365,6 +1592,98 @@ def test_stream_with_include_usage_option(
     )
 
 
+@pytest.mark.skipif(OPENAI_VERSION < (1, 26, 0), reason="stream_options added in 1.26.0")
+@pytest.mark.integration
+@pytest.mark.parametrize(
+    "provider_str,model,response_model",
+    [
+        (
+            "openai_provider_chat_completions",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+        )
+    ],
+)
+def test_stream_with_include_usage_option_and_capture_content_integration(
+    provider_str,
+    model,
+    response_model,
+    trace_exporter,
+    metrics_reader,
+    request,
+):
+    provider = request.getfixturevalue(provider_str)
+
+    # Redo the instrumentation dance to be affected by the environment variable
+    OpenAIInstrumentor().uninstrument()
+    with mock.patch.dict("os.environ", {"ELASTIC_OTEL_GENAI_CAPTURE_CONTENT": "true"}):
+        OpenAIInstrumentor().instrument()
+
+    client = provider.get_client()
+
+    messages = [
+        {
+            "role": "user",
+            "content": "Answer in up to 3 words: Which ocean contains the falkland islands?",
+        }
+    ]
+
+    response = client.chat.completions.create(
+        model=model, messages=messages, stream=True, stream_options={"include_usage": True}
+    )
+    chunks = [chunk for chunk in response]
+    usage = chunks[-1].usage
+
+    chunks_content = [chunk.choices[0].delta.content or "" for chunk in chunks if chunk.choices]
+    content = "".join(chunks_content)
+    assert content
+
+    spans = trace_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+    span = spans[0]
+    assert span.name == f"chat {model}"
+    assert span.kind == SpanKind.CLIENT
+    assert span.status.status_code == StatusCode.UNSET
+
+    assert dict(span.attributes) == {
+        GEN_AI_OPERATION_NAME: "chat",
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_SYSTEM: "openai",
+        GEN_AI_RESPONSE_ID: chunks[0].id,
+        GEN_AI_RESPONSE_MODEL: response_model,
+        GEN_AI_RESPONSE_FINISH_REASONS: ("stop",),
+        GEN_AI_USAGE_INPUT_TOKENS: usage.prompt_tokens,
+        GEN_AI_USAGE_OUTPUT_TOKENS: usage.completion_tokens,
+        SERVER_ADDRESS: provider.server_address,
+        SERVER_PORT: provider.server_port,
+    }
+    assert len(span.events) == 2
+    prompt_event, completion_event = span.events
+    assert prompt_event.name == "gen_ai.content.prompt"
+    assert dict(prompt_event.attributes) == {"gen_ai.prompt": json.dumps(messages)}
+    assert completion_event.name == "gen_ai.content.completion"
+    assert dict(completion_event.attributes) == {
+        "gen_ai.completion": '[{"role": "assistant", "content": "' + content + '"}]'
+    }
+
+    operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader)
+    attributes = {
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_RESPONSE_MODEL: response_model,
+    }
+    assert_operation_duration_metric(
+        provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT
+    )
+    assert_token_usage_metric(
+        provider,
+        token_usage_metric,
+        attributes=attributes,
+        input_data_point=usage.prompt_tokens,
+        output_data_point=usage.completion_tokens,
+    )
+
+
 test_stream_with_tools_and_capture_content_test_data = [
     (
         "openai_provider_chat_completions",
@@ -2553,6 +2872,103 @@ async def test_async_basic_with_capture_content_log_events(
     )
 
 
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "provider_str,model,response_model",
+    [
+        (
+            "openai_provider_chat_completions",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+        ),
+    ],
+)
+async def test_async_basic_with_capture_content_log_events_integration(
+    provider_str,
+    model,
+    response_model,
+    trace_exporter,
+    logs_exporter,
+    metrics_reader,
+    request,
+):
+    provider = request.getfixturevalue(provider_str)
+    client = provider.get_async_client()
+
+    # Redo the instrumentation dance to be affected by the environment variable
+    OpenAIInstrumentor().uninstrument()
+    with mock.patch.dict(
+        "os.environ", {"ELASTIC_OTEL_GENAI_CAPTURE_CONTENT": "true", "ELASTIC_OTEL_GENAI_EVENTS": "log"}
+    ):
+        OpenAIInstrumentor().instrument()
+
+    messages = [
+        {
+            "role": "user",
+            "content": "Answer in up to 3 words: Which ocean contains the falkland islands?",
+        }
+    ]
+
+    response = await client.chat.completions.create(model=model, messages=messages)
+    content = response.choices[0].message.content
+    assert content
+
+    spans = trace_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+    span = spans[0]
+    assert span.name == f"chat {model}"
+    assert span.kind == SpanKind.CLIENT
+    assert span.status.status_code == StatusCode.UNSET
+
+    assert dict(span.attributes) == {
+        GEN_AI_OPERATION_NAME: "chat",
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_SYSTEM: "openai",
+        GEN_AI_RESPONSE_ID: response.id,
+        GEN_AI_RESPONSE_MODEL: response_model,
+        GEN_AI_RESPONSE_FINISH_REASONS: ("stop",),
+        GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens,
+        GEN_AI_USAGE_OUTPUT_TOKENS: response.usage.completion_tokens,
+        SERVER_ADDRESS: provider.server_address,
+        SERVER_PORT: provider.server_port,
+    }
+
+    logs = logs_exporter.get_finished_logs()
+    assert len(logs) == 2
+    log_records = logrecords_from_logs(logs)
+    user_message, choice = log_records
+    assert user_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"}
+    assert user_message.body == {"content": "Answer in up to 3 words: Which ocean contains the falkland islands?"}
+    assert choice.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.choice"}
+
+    expected_body = {
+        "finish_reason": "stop",
+        "index": 0,
+        "message": {
+            "content": content,
+        },
+    }
+    assert dict(choice.body) == expected_body
+
+    operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader)
+    attributes = {
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_RESPONSE_MODEL: response_model,
+    }
+    assert_operation_duration_metric(
+        provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT
+    )
+    assert_token_usage_metric(
+        provider,
+        token_usage_metric,
+        attributes=attributes,
+        input_data_point=response.usage.prompt_tokens,
+        output_data_point=response.usage.completion_tokens,
+    )
+
+
 test_async_stream_test_data = [
     (
         "openai_provider_chat_completions",
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
index b86183a..ade2706 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
@@ -35,7 +35,7 @@
     assert_operation_duration_metric,
     assert_token_usage_input_metric,
 )
-from .utils import get_sorted_metrics
+from .utils import MOCK_POSITIVE_FLOAT, get_sorted_metrics
 
 
 test_basic_test_data = [
@@ -69,7 +69,7 @@ def test_basic(provider_str, model, input_tokens, duration, trace_exporter, metr
         GEN_AI_REQUEST_MODEL: model,
         GEN_AI_SYSTEM: "openai",
         GEN_AI_RESPONSE_MODEL: model,
-        GEN_AI_USAGE_INPUT_TOKENS: 4,
+        GEN_AI_USAGE_INPUT_TOKENS: input_tokens,
         SERVER_ADDRESS: provider.server_address,
         SERVER_PORT: provider.server_port,
     }
@@ -118,7 +118,7 @@ def test_all_the_client_options(provider_str, model, input_tokens, duration, tra
         GEN_AI_SYSTEM: "openai",
         GEN_AI_RESPONSE_MODEL: model,
         GEN_AI_REQUEST_ENCODING_FORMAT: "float",
-        GEN_AI_USAGE_INPUT_TOKENS: 4,
+        GEN_AI_USAGE_INPUT_TOKENS: input_tokens,
         SERVER_ADDRESS: provider.server_address,
         SERVER_PORT: provider.server_port,
     }
@@ -135,6 +135,50 @@ def test_all_the_client_options(provider_str, model, input_tokens, duration, tra
     assert_token_usage_input_metric(provider, token_usage_metric, attributes=attributes, input_data_point=4)
 
 
+@pytest.mark.integration
+@pytest.mark.parametrize("provider_str,model", [("openai_provider_embeddings", "text-embedding-3-small")])
+def test_all_the_client_options_integration(provider_str, model, trace_exporter, metrics_reader, request):
+    provider = request.getfixturevalue(provider_str)
+    client = provider.get_client()
+
+    text = "South Atlantic Ocean."
+    response = client.embeddings.create(model=model, input=[text], encoding_format="float")
+
+    assert len(response.data) == 1
+
+    spans = trace_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+    span = spans[0]
+    assert span.name == f"embeddings {model}"
+    assert span.kind == SpanKind.CLIENT
+    assert span.status.status_code == StatusCode.UNSET
+
+    assert dict(span.attributes) == {
+        GEN_AI_OPERATION_NAME: provider.operation_name,
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_SYSTEM: "openai",
+        GEN_AI_RESPONSE_MODEL: model,
+        GEN_AI_REQUEST_ENCODING_FORMAT: "float",
+        GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens,
+        SERVER_ADDRESS: provider.server_address,
+        SERVER_PORT: provider.server_port,
+    }
+    assert span.events == ()
+
+    operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader)
+    attributes = {
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_RESPONSE_MODEL: model,
+    }
+    assert_operation_duration_metric(
+        provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT
+    )
+    assert_token_usage_input_metric(
+        provider, token_usage_metric, attributes=attributes, input_data_point=response.usage.prompt_tokens
+    )
+
+
 test_connection_error_data = [
     ("openai_provider_embeddings", "text-embedding-3-small", 0.460242404602468),
     ("azure_provider_embeddings", "text-embedding-3-small", 0.4328950522467494),
@@ -286,6 +330,51 @@ async def test_async_all_the_client_options(
     assert_token_usage_input_metric(provider, token_usage_metric, attributes=attributes, input_data_point=input_tokens)
 
 
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize("provider_str,model", [("openai_provider_embeddings", "text-embedding-3-small")])
+async def test_async_all_the_client_options_integration(provider_str, model, trace_exporter, metrics_reader, request):
+    provider = request.getfixturevalue(provider_str)
+    client = provider.get_async_client()
+
+    text = "South Atlantic Ocean."
+    response = await client.embeddings.create(model=model, input=[text], encoding_format="float")
+
+    assert len(response.data) == 1
+
+    spans = trace_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+    span = spans[0]
+    assert span.name == f"embeddings {model}"
+    assert span.kind == SpanKind.CLIENT
+    assert span.status.status_code == StatusCode.UNSET
+
+    assert dict(span.attributes) == {
+        GEN_AI_OPERATION_NAME: provider.operation_name,
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_SYSTEM: "openai",
+        GEN_AI_RESPONSE_MODEL: model,
+        GEN_AI_REQUEST_ENCODING_FORMAT: "float",
+        GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens,
+        SERVER_ADDRESS: provider.server_address,
+        SERVER_PORT: provider.server_port,
+    }
+    assert span.events == ()
+
+    operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader)
+    attributes = {
+        GEN_AI_REQUEST_MODEL: model,
+        GEN_AI_RESPONSE_MODEL: model,
+    }
+    assert_operation_duration_metric(
+        provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT
+    )
+    assert_token_usage_input_metric(
+        provider, token_usage_metric, attributes=attributes, input_data_point=response.usage.prompt_tokens
+    )
+
+
 test_async_connection_error_test_data = [
     ("openai_provider_embeddings", "text-embedding-3-small", 0.2263190783560276),
     ("azure_provider_embeddings", "text-embedding-3-small", 0.0036478489999751673),
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/utils.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/utils.py
index 61bf75a..02cf46c 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/utils.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from typing import Mapping, Optional, Sequence
+from unittest import mock
 
 from opentelemetry.sdk._logs._internal import LogData
 from opentelemetry.sdk.metrics._internal.point import Metric
@@ -69,9 +70,16 @@ def is_data_points_equal(
 
     values_diff = None
     if isinstance(data_point, NumberDataPoint):
-        values_diff = abs(expected_data_point.value - data_point.value)
+        if data_point.value is mock.ANY or data_point.value is MOCK_POSITIVE_FLOAT:
+            values_diff = 0
+        else:
+            values_diff = abs(expected_data_point.value - data_point.value)
     elif isinstance(data_point, HistogramDataPoint):
-        values_diff = abs(expected_data_point.sum - data_point.sum)
+        if expected_data_point.sum is mock.ANY or expected_data_point.sum is MOCK_POSITIVE_FLOAT:
+            values_diff = 0
+        else:
+            values_diff = abs(expected_data_point.sum - data_point.sum)
+
         if expected_data_point.count != data_point.count or (
             est_value_delta == 0
             and (expected_data_point.min != data_point.min or expected_data_point.max != data_point.max)
@@ -123,3 +131,11 @@ def create_histogram_data_point(sum_data_point, count, max_data_point, min_data_
 
 def logrecords_from_logs(logs: Sequence[LogData]) -> Sequence[Mapping[str, AttributeValue]]:
     return [log.log_record for log in logs]
+
+
+class MockPositiveFloat:
+    def __eq__(self, other):
+        return isinstance(other, float) and other > 0.0
+
+
+MOCK_POSITIVE_FLOAT = MockPositiveFloat()

From 210ace48ec3fc74dee4230dfc23f501e6a420085 Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
Date: Thu, 14 Nov 2024 16:20:47 +0100
Subject: [PATCH 3/4] Please azure timings\

---
 .../tests/test_embeddings.py                                    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
index ade2706..de00565 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
@@ -377,7 +377,7 @@ async def test_async_all_the_client_options_integration(provider_str, model, tra
 
 test_async_connection_error_test_data = [
     ("openai_provider_embeddings", "text-embedding-3-small", 0.2263190783560276),
-    ("azure_provider_embeddings", "text-embedding-3-small", 0.0036478489999751673),
+    ("azure_provider_embeddings", "text-embedding-3-small", 1.0062104639999916),
     ("ollama_provider_embeddings", "all-minilm:33m", 0.0030461717396974564),
 ]
 

From 0daf9dc598ef8307b1ae4d9717ae990d571d3e00 Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
Date: Thu, 14 Nov 2024 16:28:33 +0100
Subject: [PATCH 4/4] Please also ollama timings

---
 .../tests/test_embeddings.py                                    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
index de00565..04d3d0b 100644
--- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
+++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py
@@ -378,7 +378,7 @@ async def test_async_all_the_client_options_integration(provider_str, model, tra
 test_async_connection_error_test_data = [
     ("openai_provider_embeddings", "text-embedding-3-small", 0.2263190783560276),
     ("azure_provider_embeddings", "text-embedding-3-small", 1.0062104639999916),
-    ("ollama_provider_embeddings", "all-minilm:33m", 0.0030461717396974564),
+    ("ollama_provider_embeddings", "all-minilm:33m", 1.0148218229999770),
 ]