From ef911fe58678918a66d88096a1a618ee0c59f75b Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Thu, 14 Nov 2024 15:31:52 +0100 Subject: [PATCH 1/4] elastic-opentelemetry-instrumentation-openai: test against baseline 1.26.0 is required for having usage data with streams but with some small changes we can keep the baseline as old as 1.2.0. --- .github/workflows/ci.yml | 19 ++++++++++++++----- .../pyproject.toml | 3 ++- .../instrumentation/openai/package.py | 2 +- .../instrumentation/openai/wrappers.py | 4 +++- .../tests/test_chat_completions.py | 4 +++- 5 files changed, 23 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 615ae3a..4ae2add 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: - run: pip install . working-directory: ${{ matrix.working-dir }} - test: + test-openai: runs-on: ubuntu-latest env: py38: "3.8" @@ -43,11 +43,15 @@ jobs: py310: "3.10" py311: "3.11" py312: "3.12" + # Baseline version of openai client we support + baseline: "1.2.0" + latest: "" + working_dir: "instrumentation/elastic-opentelemetry-instrumentation-openai" strategy: fail-fast: false matrix: - working-dir: ['instrumentation/elastic-opentelemetry-instrumentation-openai'] python-version: [py38, py39, py310, py311, py312] + openai-version: [baseline, latest] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ env[matrix.python-version] }} @@ -57,9 +61,14 @@ jobs: architecture: "x64" - if: ${{ env[matrix.python-version] == '3.8' || env[matrix.python-version] == '3.9' }} run: pip install -r dev-requirements-3.9.txt - working-directory: ${{ matrix.working-dir }} + working-directory: ${{ env.working_dir }} - if: ${{ env[matrix.python-version] != '3.8' && env[matrix.python-version] != '3.9' }} run: pip install -r dev-requirements.txt - working-directory: ${{ matrix.working-dir }} + working-directory: ${{ env.working_dir }} + - if: ${{ env[matrix.openai-version] }} + name: update openai to required version if not latest + run: + pip install openai==${{ env[matrix.openai-version] }} + working-directory: ${{ env.working_dir }} - run: pytest - working-directory: ${{ matrix.working-dir }} + working-directory: ${{ env.working_dir }} diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/pyproject.toml b/instrumentation/elastic-opentelemetry-instrumentation-openai/pyproject.toml index 367ad86..c2cc468 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/pyproject.toml +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "opentelemetry-api ~= 1.28.1", "opentelemetry-instrumentation ~= 0.49b1", "opentelemetry-semantic-conventions ~= 0.49b1", + "wrapt >= 1.0.0, < 2.0.0", ] [project.readme] @@ -42,7 +43,7 @@ Homepage = "https://github.com/elastic/elastic-otel-python-instrumentations" [project.optional-dependencies] dev = ["pytest", "pip-tools", "openai", "numpy", "opentelemetry-test-utils", "vcrpy", "pytest-asyncio", "pytest-vcr"] instruments = [ - "openai >= 1.0.0", + "openai >= 1.2.0", ] [project.entry-points.opentelemetry_instrumentor] diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/package.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/package.py index d034f6e..be9cf1c 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/package.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/package.py @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -_instruments = ("openai >= 1.0.0",) +_instruments = ("openai >= 1.2.0",) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py index d7a2acc..cf4cfa9 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py @@ -104,7 +104,9 @@ def end(self, exc=None): def process_chunk(self, chunk): self.response_id = chunk.id self.model = chunk.model - self.usage = chunk.usage + # usage with streaming is available since 1.26.0 + if hasattr(chunk, "usage"): + self.usage = chunk.usage # with `include_usage` in `stream_options` we will get a last chunk without choices if chunk.choices: self.choices += chunk.choices diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py index 2b0c344..50a4954 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py @@ -48,6 +48,8 @@ ) from .utils import get_sorted_metrics, logrecords_from_logs +OPENAI_VERSION = tuple([int(x) for x in openai.version.VERSION.split(".")]) + providers = ["openai_provider_chat_completions", "ollama_provider_chat_completions", "azure_provider_chat_completions"] @@ -1288,7 +1290,7 @@ def test_stream( ), ] - +@pytest.mark.skipif(OPENAI_VERSION < (1, 26, 0), reason="stream_options added in 1.26.0") @pytest.mark.vcr() @pytest.mark.parametrize( "provider_str,model,response_model,content,response_id,input_tokens,output_tokens,duration", From 45df683803d246a0a9d7f2c128c5078ea83fe39c Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Thu, 14 Nov 2024 15:51:48 +0100 Subject: [PATCH 2/4] elastic-opentelemetry-instrumentation-openai: add openai integration tests This adds a bunch of integration tests hitting openai. This requires setting a variable `OPENAI_API_KEY` in github secrets. Tests themselves are a copy of the recorded ones asserting values from the response data. Integration tests are not run by default and can be called with `pytest --integration-tests` --- .github/workflows/ci.yml | 6 + .../README.md | 6 + .../tests/conftest.py | 23 + .../tests/test_chat_completions.py | 418 +++++++++++++++++- .../tests/test_embeddings.py | 95 +++- .../tests/utils.py | 20 +- 6 files changed, 562 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ae2add..3909c62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -72,3 +72,9 @@ jobs: working-directory: ${{ env.working_dir }} - run: pytest working-directory: ${{ env.working_dir }} + - if: ${{ env[matrix.python-version] == '3.12' && !env[matrix.openai-version] }} + # Only run on latest python and openai client version because we are calling openai + run: pytest --integration-tests + working-directory: ${{ env.working_dir }} + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md b/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md index ce8d9b9..eb6832d 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md @@ -71,6 +71,12 @@ pip install -r dev-requirements.txt pytest ``` +To run integration tests doing real requests: + +``` +OPENAI_API_KEY=unused pytest --integration-tests +``` + ## Refreshing HTTP payloads We use [VCR.py](https://vcrpy.readthedocs.io/en/latest/) to automatically record HTTP responses from diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/conftest.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/conftest.py index 9e42125..e3b2437 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/conftest.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/conftest.py @@ -498,3 +498,26 @@ def assert_token_usage_metric( ), ], ) + + +def pytest_addoption(parser): + parser.addoption( + "--integration-tests", + action="store_true", + default=False, + help="run integrations tests doing real requests", + ) + + +def pytest_configure(config): + config.addinivalue_line("markers", "integration: mark integration tests") + + +def pytest_collection_modifyitems(config, items): + run_integration_tests = bool(config.getoption("integration_tests")) + reason = "running integrations tests only" if run_integration_tests else "skipping integration tests" + skip_mark = pytest.mark.skip(reason=reason) + for item in items: + test_is_integration = "integration" in item.keywords + if run_integration_tests != test_is_integration: + item.add_marker(skip_mark) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py index 50a4954..26bd9a3 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py @@ -46,7 +46,7 @@ assert_operation_duration_metric, assert_token_usage_metric, ) -from .utils import get_sorted_metrics, logrecords_from_logs +from .utils import MOCK_POSITIVE_FLOAT, get_sorted_metrics, logrecords_from_logs OPENAI_VERSION = tuple([int(x) for x in openai.version.VERSION.split(".")]) @@ -874,6 +874,146 @@ def test_tools_with_capture_content_log_events( ) +@pytest.mark.integration +@pytest.mark.parametrize( + "provider_str,model,response_model", + [ + ( + "openai_provider_chat_completions", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + ), + ], +) +def test_tools_with_capture_content_log_events_integration( + provider_str, + model, + response_model, + trace_exporter, + logs_exporter, + metrics_reader, + request, +): + provider = request.getfixturevalue(provider_str) + client = provider.get_client() + + # Redo the instrumentation dance to be affected by the environment variable + OpenAIInstrumentor().uninstrument() + with mock.patch.dict( + "os.environ", {"ELASTIC_OTEL_GENAI_CAPTURE_CONTENT": "true", "ELASTIC_OTEL_GENAI_EVENTS": "log"} + ): + OpenAIInstrumentor().instrument() + + tools = [ + { + "type": "function", + "function": { + "name": "get_delivery_date", + "description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'", + "parameters": { + "type": "object", + "properties": { + "order_id": { + "type": "string", + "description": "The customer's order ID.", + }, + }, + "required": ["order_id"], + "additionalProperties": False, + }, + }, + } + ] + + messages = [ + { + "role": "system", + "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user.", + }, + {"role": "user", "content": "Hi, can you tell me the delivery date for my order?"}, + { + "role": "assistant", + "content": "Hi there! I can help with that. Can you please provide your order ID?", + }, + {"role": "user", "content": "i think it is order_12345"}, + ] + + response = client.chat.completions.create(model=model, messages=messages, tools=tools) + tool_call = response.choices[0].message.tool_calls[0] + assert tool_call.function.name == "get_delivery_date" + assert json.loads(tool_call.function.arguments) == {"order_id": "order_12345"} + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.name == f"chat {model}" + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.UNSET + + assert dict(span.attributes) == { + GEN_AI_OPERATION_NAME: "chat", + GEN_AI_REQUEST_MODEL: model, + GEN_AI_SYSTEM: "openai", + GEN_AI_RESPONSE_ID: response.id, + GEN_AI_RESPONSE_MODEL: response_model, + GEN_AI_RESPONSE_FINISH_REASONS: ("tool_calls",), + GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens, + GEN_AI_USAGE_OUTPUT_TOKENS: response.usage.completion_tokens, + SERVER_ADDRESS: provider.server_address, + SERVER_PORT: provider.server_port, + } + + logs = logs_exporter.get_finished_logs() + assert len(logs) == 5 + log_records = logrecords_from_logs(logs) + system_message, user_message, assistant_message, second_user_message, choice = log_records + assert system_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.system.message"} + assert system_message.body == { + "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user." + } + assert user_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"} + assert user_message.body == {"content": "Hi, can you tell me the delivery date for my order?"} + assert assistant_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.assistant.message"} + assert assistant_message.body == { + "content": "Hi there! I can help with that. Can you please provide your order ID?" + } + assert second_user_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"} + assert second_user_message.body == {"content": "i think it is order_12345"} + assert choice.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.choice"} + + expected_body = { + "finish_reason": "tool_calls", + "index": 0, + "message": { + "tool_calls": [ + { + "function": {"arguments": '{"order_id":"order_12345"}', "name": "get_delivery_date"}, + "id": tool_call.id, + "type": "function", + }, + ], + }, + } + assert dict(choice.body) == expected_body + + operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader) + attributes = { + GEN_AI_REQUEST_MODEL: model, + GEN_AI_RESPONSE_MODEL: response_model, + } + assert_operation_duration_metric( + provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT + ) + assert_token_usage_metric( + provider, + token_usage_metric, + attributes=attributes, + input_data_point=response.usage.prompt_tokens, + output_data_point=response.usage.completion_tokens, + ) + + test_connection_error_test_data = [ ( "openai_provider_chat_completions", @@ -1059,6 +1199,92 @@ def test_basic_with_capture_content( ) +@pytest.mark.integration +@pytest.mark.parametrize( + "provider_str,model,response_model", + [ + ( + "openai_provider_chat_completions", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + ) + ], +) +def test_basic_with_capture_content_integration( + provider_str, + model, + response_model, + trace_exporter, + metrics_reader, + request, +): + provider = request.getfixturevalue(provider_str) + + # Redo the instrumentation dance to be affected by the environment variable + OpenAIInstrumentor().uninstrument() + with mock.patch.dict("os.environ", {"ELASTIC_OTEL_GENAI_CAPTURE_CONTENT": "true"}): + OpenAIInstrumentor().instrument() + + client = provider.get_client() + + messages = [ + { + "role": "user", + "content": "Answer in up to 3 words: Which ocean contains the falkland islands?", + } + ] + + response = client.chat.completions.create(model=model, messages=messages) + content = response.choices[0].message.content + assert content + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.name == f"chat {model}" + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.UNSET + + assert dict(span.attributes) == { + GEN_AI_OPERATION_NAME: "chat", + GEN_AI_REQUEST_MODEL: model, + GEN_AI_SYSTEM: "openai", + GEN_AI_RESPONSE_ID: response.id, + GEN_AI_RESPONSE_MODEL: response_model, + GEN_AI_RESPONSE_FINISH_REASONS: ("stop",), + GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens, + GEN_AI_USAGE_OUTPUT_TOKENS: response.usage.completion_tokens, + SERVER_ADDRESS: provider.server_address, + SERVER_PORT: provider.server_port, + } + + assert len(span.events) == 2 + prompt_event, completion_event = span.events + assert prompt_event.name == "gen_ai.content.prompt" + assert dict(prompt_event.attributes) == {"gen_ai.prompt": json.dumps(messages)} + assert completion_event.name == "gen_ai.content.completion" + assert dict(completion_event.attributes) == { + "gen_ai.completion": '[{"role": "assistant", "content": "' + content + '"}]' + } + + operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader) + attributes = { + GEN_AI_REQUEST_MODEL: model, + GEN_AI_RESPONSE_MODEL: response_model, + } + assert_operation_duration_metric( + provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT + ) + assert_token_usage_metric( + provider, + token_usage_metric, + attributes=attributes, + input_data_point=response.usage.prompt_tokens, + output_data_point=response.usage.completion_tokens, + ) + + test_basic_with_capture_content_log_events_test_data = [ ( "openai_provider_chat_completions", @@ -1290,6 +1516,7 @@ def test_stream( ), ] + @pytest.mark.skipif(OPENAI_VERSION < (1, 26, 0), reason="stream_options added in 1.26.0") @pytest.mark.vcr() @pytest.mark.parametrize( @@ -1365,6 +1592,98 @@ def test_stream_with_include_usage_option( ) +@pytest.mark.skipif(OPENAI_VERSION < (1, 26, 0), reason="stream_options added in 1.26.0") +@pytest.mark.integration +@pytest.mark.parametrize( + "provider_str,model,response_model", + [ + ( + "openai_provider_chat_completions", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + ) + ], +) +def test_stream_with_include_usage_option_and_capture_content_integration( + provider_str, + model, + response_model, + trace_exporter, + metrics_reader, + request, +): + provider = request.getfixturevalue(provider_str) + + # Redo the instrumentation dance to be affected by the environment variable + OpenAIInstrumentor().uninstrument() + with mock.patch.dict("os.environ", {"ELASTIC_OTEL_GENAI_CAPTURE_CONTENT": "true"}): + OpenAIInstrumentor().instrument() + + client = provider.get_client() + + messages = [ + { + "role": "user", + "content": "Answer in up to 3 words: Which ocean contains the falkland islands?", + } + ] + + response = client.chat.completions.create( + model=model, messages=messages, stream=True, stream_options={"include_usage": True} + ) + chunks = [chunk for chunk in response] + usage = chunks[-1].usage + + chunks_content = [chunk.choices[0].delta.content or "" for chunk in chunks if chunk.choices] + content = "".join(chunks_content) + assert content + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.name == f"chat {model}" + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.UNSET + + assert dict(span.attributes) == { + GEN_AI_OPERATION_NAME: "chat", + GEN_AI_REQUEST_MODEL: model, + GEN_AI_SYSTEM: "openai", + GEN_AI_RESPONSE_ID: chunks[0].id, + GEN_AI_RESPONSE_MODEL: response_model, + GEN_AI_RESPONSE_FINISH_REASONS: ("stop",), + GEN_AI_USAGE_INPUT_TOKENS: usage.prompt_tokens, + GEN_AI_USAGE_OUTPUT_TOKENS: usage.completion_tokens, + SERVER_ADDRESS: provider.server_address, + SERVER_PORT: provider.server_port, + } + assert len(span.events) == 2 + prompt_event, completion_event = span.events + assert prompt_event.name == "gen_ai.content.prompt" + assert dict(prompt_event.attributes) == {"gen_ai.prompt": json.dumps(messages)} + assert completion_event.name == "gen_ai.content.completion" + assert dict(completion_event.attributes) == { + "gen_ai.completion": '[{"role": "assistant", "content": "' + content + '"}]' + } + + operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader) + attributes = { + GEN_AI_REQUEST_MODEL: model, + GEN_AI_RESPONSE_MODEL: response_model, + } + assert_operation_duration_metric( + provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT + ) + assert_token_usage_metric( + provider, + token_usage_metric, + attributes=attributes, + input_data_point=usage.prompt_tokens, + output_data_point=usage.completion_tokens, + ) + + test_stream_with_tools_and_capture_content_test_data = [ ( "openai_provider_chat_completions", @@ -2553,6 +2872,103 @@ async def test_async_basic_with_capture_content_log_events( ) +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.parametrize( + "provider_str,model,response_model", + [ + ( + "openai_provider_chat_completions", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + ), + ], +) +async def test_async_basic_with_capture_content_log_events_integration( + provider_str, + model, + response_model, + trace_exporter, + logs_exporter, + metrics_reader, + request, +): + provider = request.getfixturevalue(provider_str) + client = provider.get_async_client() + + # Redo the instrumentation dance to be affected by the environment variable + OpenAIInstrumentor().uninstrument() + with mock.patch.dict( + "os.environ", {"ELASTIC_OTEL_GENAI_CAPTURE_CONTENT": "true", "ELASTIC_OTEL_GENAI_EVENTS": "log"} + ): + OpenAIInstrumentor().instrument() + + messages = [ + { + "role": "user", + "content": "Answer in up to 3 words: Which ocean contains the falkland islands?", + } + ] + + response = await client.chat.completions.create(model=model, messages=messages) + content = response.choices[0].message.content + assert content + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.name == f"chat {model}" + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.UNSET + + assert dict(span.attributes) == { + GEN_AI_OPERATION_NAME: "chat", + GEN_AI_REQUEST_MODEL: model, + GEN_AI_SYSTEM: "openai", + GEN_AI_RESPONSE_ID: response.id, + GEN_AI_RESPONSE_MODEL: response_model, + GEN_AI_RESPONSE_FINISH_REASONS: ("stop",), + GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens, + GEN_AI_USAGE_OUTPUT_TOKENS: response.usage.completion_tokens, + SERVER_ADDRESS: provider.server_address, + SERVER_PORT: provider.server_port, + } + + logs = logs_exporter.get_finished_logs() + assert len(logs) == 2 + log_records = logrecords_from_logs(logs) + user_message, choice = log_records + assert user_message.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"} + assert user_message.body == {"content": "Answer in up to 3 words: Which ocean contains the falkland islands?"} + assert choice.attributes == {"gen_ai.system": "openai", "event.name": "gen_ai.choice"} + + expected_body = { + "finish_reason": "stop", + "index": 0, + "message": { + "content": content, + }, + } + assert dict(choice.body) == expected_body + + operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader) + attributes = { + GEN_AI_REQUEST_MODEL: model, + GEN_AI_RESPONSE_MODEL: response_model, + } + assert_operation_duration_metric( + provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT + ) + assert_token_usage_metric( + provider, + token_usage_metric, + attributes=attributes, + input_data_point=response.usage.prompt_tokens, + output_data_point=response.usage.completion_tokens, + ) + + test_async_stream_test_data = [ ( "openai_provider_chat_completions", diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py index b86183a..ade2706 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py @@ -35,7 +35,7 @@ assert_operation_duration_metric, assert_token_usage_input_metric, ) -from .utils import get_sorted_metrics +from .utils import MOCK_POSITIVE_FLOAT, get_sorted_metrics test_basic_test_data = [ @@ -69,7 +69,7 @@ def test_basic(provider_str, model, input_tokens, duration, trace_exporter, metr GEN_AI_REQUEST_MODEL: model, GEN_AI_SYSTEM: "openai", GEN_AI_RESPONSE_MODEL: model, - GEN_AI_USAGE_INPUT_TOKENS: 4, + GEN_AI_USAGE_INPUT_TOKENS: input_tokens, SERVER_ADDRESS: provider.server_address, SERVER_PORT: provider.server_port, } @@ -118,7 +118,7 @@ def test_all_the_client_options(provider_str, model, input_tokens, duration, tra GEN_AI_SYSTEM: "openai", GEN_AI_RESPONSE_MODEL: model, GEN_AI_REQUEST_ENCODING_FORMAT: "float", - GEN_AI_USAGE_INPUT_TOKENS: 4, + GEN_AI_USAGE_INPUT_TOKENS: input_tokens, SERVER_ADDRESS: provider.server_address, SERVER_PORT: provider.server_port, } @@ -135,6 +135,50 @@ def test_all_the_client_options(provider_str, model, input_tokens, duration, tra assert_token_usage_input_metric(provider, token_usage_metric, attributes=attributes, input_data_point=4) +@pytest.mark.integration +@pytest.mark.parametrize("provider_str,model", [("openai_provider_embeddings", "text-embedding-3-small")]) +def test_all_the_client_options_integration(provider_str, model, trace_exporter, metrics_reader, request): + provider = request.getfixturevalue(provider_str) + client = provider.get_client() + + text = "South Atlantic Ocean." + response = client.embeddings.create(model=model, input=[text], encoding_format="float") + + assert len(response.data) == 1 + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.name == f"embeddings {model}" + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.UNSET + + assert dict(span.attributes) == { + GEN_AI_OPERATION_NAME: provider.operation_name, + GEN_AI_REQUEST_MODEL: model, + GEN_AI_SYSTEM: "openai", + GEN_AI_RESPONSE_MODEL: model, + GEN_AI_REQUEST_ENCODING_FORMAT: "float", + GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens, + SERVER_ADDRESS: provider.server_address, + SERVER_PORT: provider.server_port, + } + assert span.events == () + + operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader) + attributes = { + GEN_AI_REQUEST_MODEL: model, + GEN_AI_RESPONSE_MODEL: model, + } + assert_operation_duration_metric( + provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT + ) + assert_token_usage_input_metric( + provider, token_usage_metric, attributes=attributes, input_data_point=response.usage.prompt_tokens + ) + + test_connection_error_data = [ ("openai_provider_embeddings", "text-embedding-3-small", 0.460242404602468), ("azure_provider_embeddings", "text-embedding-3-small", 0.4328950522467494), @@ -286,6 +330,51 @@ async def test_async_all_the_client_options( assert_token_usage_input_metric(provider, token_usage_metric, attributes=attributes, input_data_point=input_tokens) +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.parametrize("provider_str,model", [("openai_provider_embeddings", "text-embedding-3-small")]) +async def test_async_all_the_client_options_integration(provider_str, model, trace_exporter, metrics_reader, request): + provider = request.getfixturevalue(provider_str) + client = provider.get_async_client() + + text = "South Atlantic Ocean." + response = await client.embeddings.create(model=model, input=[text], encoding_format="float") + + assert len(response.data) == 1 + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.name == f"embeddings {model}" + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.UNSET + + assert dict(span.attributes) == { + GEN_AI_OPERATION_NAME: provider.operation_name, + GEN_AI_REQUEST_MODEL: model, + GEN_AI_SYSTEM: "openai", + GEN_AI_RESPONSE_MODEL: model, + GEN_AI_REQUEST_ENCODING_FORMAT: "float", + GEN_AI_USAGE_INPUT_TOKENS: response.usage.prompt_tokens, + SERVER_ADDRESS: provider.server_address, + SERVER_PORT: provider.server_port, + } + assert span.events == () + + operation_duration_metric, token_usage_metric = get_sorted_metrics(metrics_reader) + attributes = { + GEN_AI_REQUEST_MODEL: model, + GEN_AI_RESPONSE_MODEL: model, + } + assert_operation_duration_metric( + provider, operation_duration_metric, attributes=attributes, min_data_point=MOCK_POSITIVE_FLOAT + ) + assert_token_usage_input_metric( + provider, token_usage_metric, attributes=attributes, input_data_point=response.usage.prompt_tokens + ) + + test_async_connection_error_test_data = [ ("openai_provider_embeddings", "text-embedding-3-small", 0.2263190783560276), ("azure_provider_embeddings", "text-embedding-3-small", 0.0036478489999751673), diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/utils.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/utils.py index 61bf75a..02cf46c 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/utils.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/utils.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import Mapping, Optional, Sequence +from unittest import mock from opentelemetry.sdk._logs._internal import LogData from opentelemetry.sdk.metrics._internal.point import Metric @@ -69,9 +70,16 @@ def is_data_points_equal( values_diff = None if isinstance(data_point, NumberDataPoint): - values_diff = abs(expected_data_point.value - data_point.value) + if data_point.value is mock.ANY or data_point.value is MOCK_POSITIVE_FLOAT: + values_diff = 0 + else: + values_diff = abs(expected_data_point.value - data_point.value) elif isinstance(data_point, HistogramDataPoint): - values_diff = abs(expected_data_point.sum - data_point.sum) + if expected_data_point.sum is mock.ANY or expected_data_point.sum is MOCK_POSITIVE_FLOAT: + values_diff = 0 + else: + values_diff = abs(expected_data_point.sum - data_point.sum) + if expected_data_point.count != data_point.count or ( est_value_delta == 0 and (expected_data_point.min != data_point.min or expected_data_point.max != data_point.max) @@ -123,3 +131,11 @@ def create_histogram_data_point(sum_data_point, count, max_data_point, min_data_ def logrecords_from_logs(logs: Sequence[LogData]) -> Sequence[Mapping[str, AttributeValue]]: return [log.log_record for log in logs] + + +class MockPositiveFloat: + def __eq__(self, other): + return isinstance(other, float) and other > 0.0 + + +MOCK_POSITIVE_FLOAT = MockPositiveFloat() From 210ace48ec3fc74dee4230dfc23f501e6a420085 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Thu, 14 Nov 2024 16:20:47 +0100 Subject: [PATCH 3/4] Please azure timings\ --- .../tests/test_embeddings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py index ade2706..de00565 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py @@ -377,7 +377,7 @@ async def test_async_all_the_client_options_integration(provider_str, model, tra test_async_connection_error_test_data = [ ("openai_provider_embeddings", "text-embedding-3-small", 0.2263190783560276), - ("azure_provider_embeddings", "text-embedding-3-small", 0.0036478489999751673), + ("azure_provider_embeddings", "text-embedding-3-small", 1.0062104639999916), ("ollama_provider_embeddings", "all-minilm:33m", 0.0030461717396974564), ] From 0daf9dc598ef8307b1ae4d9717ae990d571d3e00 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Thu, 14 Nov 2024 16:28:33 +0100 Subject: [PATCH 4/4] Please also ollama timings --- .../tests/test_embeddings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py index de00565..04d3d0b 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_embeddings.py @@ -378,7 +378,7 @@ async def test_async_all_the_client_options_integration(provider_str, model, tra test_async_connection_error_test_data = [ ("openai_provider_embeddings", "text-embedding-3-small", 0.2263190783560276), ("azure_provider_embeddings", "text-embedding-3-small", 1.0062104639999916), - ("ollama_provider_embeddings", "all-minilm:33m", 0.0030461717396974564), + ("ollama_provider_embeddings", "all-minilm:33m", 1.0148218229999770), ]