From bca25e5d7adbc8abc37100775f9112c8565e01ac Mon Sep 17 00:00:00 2001 From: Adrian Cole <64215+codefromthecrypt@users.noreply.github.com> Date: Tue, 17 Dec 2024 12:17:12 +0100 Subject: [PATCH 1/5] elastic-opentelemetry-instrumentation-openai: add examples --- .../README.md | 7 +- .../examples/README.md | 71 +++++++++++++++++++ .../examples/chat.py | 25 +++++++ .../examples/embeddings.py | 49 +++++++++++++ .../examples/requirements.txt | 2 + 5 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 instrumentation/elastic-opentelemetry-instrumentation-openai/examples/README.md create mode 100644 instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py create mode 100644 instrumentation/elastic-opentelemetry-instrumentation-openai/examples/embeddings.py create mode 100644 instrumentation/elastic-opentelemetry-instrumentation-openai/examples/requirements.txt diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md b/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md index 1bf58ae..9b68588 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/README.md @@ -23,10 +23,13 @@ pip install elastic-opentelemetry-instrumentation-openai This instrumentation supports *zero-code* / *autoinstrumentation*: ``` -opentelemetry-instrument python use_openai.py +# "examples/chat.py" is a simple script using the openai client library. +cd examples + +opentelemetry-instrument python chat.py # You can record more information about prompts as log events by enabling content capture. -OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true opentelemetry-instrument python use_openai.py +OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true opentelemetry-instrument python chat.py ``` Or manual instrumentation: diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/README.md b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/README.md new file mode 100644 index 0000000..a7b756e --- /dev/null +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/README.md @@ -0,0 +1,71 @@ +# OpenAI Zero-Code Instrumentation Examples + +This is an example of how to instrument OpenAI calls with zero code changes, +using `opentelemetry-instrument` included in the Elastic Distribution of +OpenTelemetry Python ([EDOT Python][edot-python]). + +When OpenAI examples run, they export traces, metrics and logs to an OTLP +compatible endpoint. Traces and metrics include details such as the model used +and the duration of the LLM request. In the case of chat, Logs capture the +request and the generated response. The combination of these provide a +comprehensive view of the performance and behavior of your OpenAI usage. + +## Install + +First, set up a Python virtual environment like this: +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +Next, install [EDOT Python][edot-python] and dotenv which is a portable way to +load environment variables. +```bash +pip install "python-dotenv[cli]" elastic-opentelemetry +``` + +Finally, run `edot-bootstrap` which analyzes the code to add relevant +instrumentation, to record traces, metrics and logs. +```bash +edot-bootstrap --action=install +``` + +## Configure + +Minimally, update the [.env](.env) file with your `OPENAI_API_KEY`. + +An OTLP compatible endpoint should be listening for traces, metrics and logs on +`http://localhost:4317`. If not, update `OTEL_EXPORTER_OTLP_ENDPOINT` as well. + +## Run + +There are two examples, and they run the same way: + +### Chat + +[chat.py](chat.py) asks the LLM a geography question and prints the response. + +Run it like this: +```bash +dotenv run -- opentelemetry-instrument python chat.py +``` + +You should see something like "Atlantic Ocean" unless your LLM hallucinates! + +### Embeddings + + +[embeddings.py](embeddings.py) creates in-memory VectorDB embeddings about +Elastic products. Then, it searches for one similar to a question. + +Run it like this: +```bash +dotenv run -- opentelemetry-instrument python embeddings.py +``` + +You should see something like "Connectors" unless your LLM hallucinates! + +--- + +[edot-python]: https://github.com/elastic/elastic-otel-python/blob/main/docs/get-started.md diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py new file mode 100644 index 0000000..d753be2 --- /dev/null +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +import os + +import openai + +CHAT_MODEL = os.environ.get("CHAT_MODEL", "gpt-4o-mini") + + +def main(): + client = openai.Client() + + messages = [ + { + "role": "user", + "content": "Answer in up to 3 words: Which ocean contains the falkland islands?", + } + ] + + chat_completion = client.chat.completions.create(model=CHAT_MODEL, messages=messages) + print(chat_completion.choices[0].message.content) + + +if __name__ == "__main__": + main() diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/embeddings.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/embeddings.py new file mode 100644 index 0000000..75f8ea7 --- /dev/null +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/embeddings.py @@ -0,0 +1,49 @@ +import os + +import numpy as np +import openai + +EMBEDDINGS_MODEL = os.environ.get("EMBEDDINGS_MODEL", "text-embedding-3-small") + + +def main(): + client = openai.Client() + + products = [ + "Search: Ingest your data, and explore Elastic's machine learning and retrieval augmented generation (RAG) capabilities." + "Observability: Unify your logs, metrics, traces, and profiling at scale in a single platform.", + "Security: Protect, investigate, and respond to cyber threats with AI-driven security analytics." + "Elasticsearch: Distributed, RESTful search and analytics.", + "Kibana: Visualize your data. Navigate the Stack.", + "Beats: Collect, parse, and ship in a lightweight fashion.", + "Connectors: Connect popular databases, file systems, collaboration tools, and more.", + "Logstash: Ingest, transform, enrich, and output.", + ] + + # Generate embeddings for each product. Keep them in an array instead of a vector DB. + product_embeddings = [] + for product in products: + product_embeddings.append(create_embedding(client, product)) + + query_embedding = create_embedding(client, "What can help me connect to OpenAI?") + + # Calculate cosine similarity between the query and document embeddings + similarities = [] + for product_embedding in product_embeddings: + similarity = np.dot(query_embedding, product_embedding) / ( + np.linalg.norm(query_embedding) * np.linalg.norm(product_embedding) + ) + similarities.append(similarity) + + # Get the index of the most similar document + most_similar_index = np.argmax(similarities) + + print(products[most_similar_index]) + + +def create_embedding(client, text): + return client.embeddings.create(input=[text], model=EMBEDDINGS_MODEL, encoding_format="float").data[0].embedding + + +if __name__ == "__main__": + main() diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/requirements.txt b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/requirements.txt new file mode 100644 index 0000000..07f1660 --- /dev/null +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/requirements.txt @@ -0,0 +1,2 @@ +openai~=1.57.2 +numpy~=2.2.0 From 2ac1ad774e8a89a9b92dcca3c4df822e284a9199 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 17 Dec 2024 12:21:23 +0100 Subject: [PATCH 2/5] scripts/license_headers_check.sh: ignore code examples --- scripts/license_headers_check.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/license_headers_check.sh b/scripts/license_headers_check.sh index d0a552d..3eb0165 100755 --- a/scripts/license_headers_check.sh +++ b/scripts/license_headers_check.sh @@ -17,7 +17,7 @@ if [ $# -eq 0 ] then - FILES=$(find . \( -name "*.py" -o -name "*.c" -o -name "*.sh" \) -size +1c -not -path "./dist/*" -not -path "./build/*" -not -path "./venv*/*") + FILES=$(find . \( -name "*.py" -o -name "*.c" -o -name "*.sh" \) -size +1c -not -path "./dist/*" -not -path "./build/*" -not -path "./venv*/*" -not -path "*/examples/*") else FILES=$@ fi From 45a407cd3462ffacc4908b6a037ee68ca2778a87 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 17 Dec 2024 15:29:40 +0100 Subject: [PATCH 3/5] Update instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py --- .../examples/chat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py index d753be2..dab7862 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - import os import openai From bee76b07f4cf53541fb6a6576a96b55d872f511c Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Wed, 18 Dec 2024 09:54:32 +0100 Subject: [PATCH 4/5] Update instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py Co-authored-by: Adrian Cole <64215+codefromthecrypt@users.noreply.github.com> --- .../examples/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py index dab7862..40d6475 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/chat.py @@ -11,7 +11,7 @@ def main(): messages = [ { "role": "user", - "content": "Answer in up to 3 words: Which ocean contains the falkland islands?", + "content": "Answer in up to 3 words: Which ocean contains Bouvet Island?", } ] From d0d0953cdd404c2cac148df1e1ff347fda08cc9d Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Wed, 18 Dec 2024 09:59:07 +0100 Subject: [PATCH 5/5] Ship a default .env and update README to ask to copy it --- .../examples/README.md | 2 +- .../examples/default.env | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 instrumentation/elastic-opentelemetry-instrumentation-openai/examples/default.env diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/README.md b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/README.md index a7b756e..4b8bbeb 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/README.md +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/README.md @@ -33,7 +33,7 @@ edot-bootstrap --action=install ## Configure -Minimally, update the [.env](.env) file with your `OPENAI_API_KEY`. +Copy [default.env](default.env) to `.env` and update the file with your `OPENAI_API_KEY`. An OTLP compatible endpoint should be listening for traces, metrics and logs on `http://localhost:4317`. If not, update `OTEL_EXPORTER_OTLP_ENDPOINT` as well. diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/default.env b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/default.env new file mode 100644 index 0000000..c2dda12 --- /dev/null +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/examples/default.env @@ -0,0 +1,19 @@ +# Update this with your real OpenAI API key +OPENAI_API_KEY=sk-YOUR_API_KEY + +# Uncomment to use Ollama instead of OpenAI +# OPENAI_BASE_URL=http://localhost:11434/v1 +# OPENAI_API_KEY=unused +# CHAT_MODEL=qwen2.5:0.5b +# EMBEDDINGS_MODEL=all-minilm:33m + +# Uncomment and change to your OTLP endpoint +# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 +# OTEL_EXPORTER_OTLP_PROTOCOL=grpc + +OTEL_SERVICE_NAME=opentelemetry-python-openai + +# Change to 'false' to hide prompt and completion content +OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true +# Change to affect behavior of which resources are detected +OTEL_EXPERIMENTAL_RESOURCE_DETECTORS=process_runtime,os,otel,telemetry_distro \ No newline at end of file