diff --git a/.github/workflows/loongsuite_lint_0.yml b/.github/workflows/loongsuite_lint_0.yml index 49f79755f..445227447 100644 --- a/.github/workflows/loongsuite_lint_0.yml +++ b/.github/workflows/loongsuite_lint_0.yml @@ -108,6 +108,25 @@ jobs: - name: Run tests run: tox -c tox-loongsuite.ini -e lint-loongsuite-instrumentation-google-adk + lint-loongsuite-instrumentation-langchain: + name: LoongSuite loongsuite-instrumentation-langchain + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e lint-loongsuite-instrumentation-langchain + lint-loongsuite-instrumentation-mem0: name: LoongSuite loongsuite-instrumentation-mem0 runs-on: ubuntu-latest diff --git a/.github/workflows/loongsuite_test_0.yml b/.github/workflows/loongsuite_test_0.yml index 1dd2ffc77..6b92314d7 100644 --- a/.github/workflows/loongsuite_test_0.yml +++ b/.github/workflows/loongsuite_test_0.yml @@ -716,6 +716,196 @@ jobs: - name: Run tests run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-google-adk-latest -- -ra + py39-test-loongsuite-instrumentation-langchain-oldest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-oldest 3.9 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.9 + uses: actions/setup-python@v5 + with: + python-version: "3.9" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py39-test-loongsuite-instrumentation-langchain-oldest -- -ra + + py39-test-loongsuite-instrumentation-langchain-latest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-latest 3.9 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.9 + uses: actions/setup-python@v5 + with: + python-version: "3.9" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py39-test-loongsuite-instrumentation-langchain-latest -- -ra + + py310-test-loongsuite-instrumentation-langchain-oldest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-oldest 3.10 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py310-test-loongsuite-instrumentation-langchain-oldest -- -ra + + py310-test-loongsuite-instrumentation-langchain-latest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-latest 3.10 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py310-test-loongsuite-instrumentation-langchain-latest -- -ra + + py311-test-loongsuite-instrumentation-langchain-oldest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-oldest 3.11 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py311-test-loongsuite-instrumentation-langchain-oldest -- -ra + + py311-test-loongsuite-instrumentation-langchain-latest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-latest 3.11 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py311-test-loongsuite-instrumentation-langchain-latest -- -ra + + py312-test-loongsuite-instrumentation-langchain-oldest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-oldest 3.12 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py312-test-loongsuite-instrumentation-langchain-oldest -- -ra + + py312-test-loongsuite-instrumentation-langchain-latest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-latest 3.12 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py312-test-loongsuite-instrumentation-langchain-latest -- -ra + + py313-test-loongsuite-instrumentation-langchain-oldest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-oldest 3.13 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-langchain-oldest -- -ra + + py313-test-loongsuite-instrumentation-langchain-latest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-langchain-latest 3.13 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-langchain-latest -- -ra + py310-test-loongsuite-instrumentation-mem0-oldest_ubuntu-latest: name: LoongSuite loongsuite-instrumentation-mem0-oldest 3.10 Ubuntu runs-on: ubuntu-latest diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/CHANGELOG.md b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/CHANGELOG.md index 6fbb9137d..fa2cff59f 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/CHANGELOG.md +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/CHANGELOG.md @@ -7,6 +7,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- ReAct Step instrumentation for AgentExecutor + ([#139](https://github.com/alibaba/loongsuite-python-agent/pull/139)) + - Monkey-patch `AgentExecutor._iter_next_step` and `_aiter_next_step` to instrument each ReAct iteration + - Dual patch: patch both `langchain.agents` (0.x) and `langchain_classic.agents` (1.x) when available, so either import path works + - Covers invoke, ainvoke, stream, astream, batch, abatch + - ReAct Step spans: `gen_ai.span.kind=STEP`, `gen_ai.operation.name=react`, `gen_ai.react.round`, `gen_ai.react.finish_reason` + - Span hierarchy: Agent > ReAct Step > LLM/Tool + +- LangGraph ReAct agent support (requires `loongsuite-instrumentation-langgraph`) + ([#139](https://github.com/alibaba/loongsuite-python-agent/pull/139)) + - Detect LangGraph agents via `Run.metadata["_loongsuite_react_agent"]` + (metadata injected by the LangGraph instrumentation) + - Disambiguate the top-level graph (Agent span) from child nodes (chain + spans) using `inside_langgraph_react` propagation + - Agent name resolution: when the ReAct agent is invoked inside an outer + graph node, inherit the node's name (e.g. `invoke_agent product_agent`) + instead of the generic default (`invoke_agent LangGraph`) + - Track ReAct step boundaries via callback-based detection of the + `"agent"` node within the graph + - Span hierarchy: Agent > ReAct Step > LLM/Tool (same as AgentExecutor) + +### Breaking Changes + +- Rewrite the instrumentation for LangChain with `genai-util` + ([#139](https://github.com/alibaba/loongsuite-python-agent/pull/139)) + - Replaced the legacy `wrapt`-based function wrapping with `BaseTracer` callback mechanism + - Migrated to `ExtendedTelemetryHandler` from `opentelemetry-util-genai` for standardized GenAI semantic conventions + - Added Agent detection by `run.name`, TTFT tracking, content capture gating, and `RLock` thread safety + - Added new test suite with `oldest`/`latest` dependency matrices + ## Version 0.1.0 (2026-02-28) ### Added diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/README.md b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/README.md index 0aa044d5e..8865f843a 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/README.md +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/README.md @@ -4,23 +4,35 @@ This package provides OpenTelemetry instrumentation for LangChain applications, ## Installation +### Install instrumentation with source code + ```bash git clone https://github.com/alibaba/loongsuite-python-agent.git cd loongsuite-python-agent -pip install ./instrumentation-loongsuite/loongsuite-instrumentation-langchain +pip install -e ./util/opentelemetry-util-genai +pip install -e ./instrumentation-loongsuite/loongsuite-instrumentation-langchain +pip install -e ./loongsuite-distro ``` ## RUN ### Build the Example -Follow the official [LangChain Documentation](https://python.langchain.com/docs/introduction/) to create a sample file named `demo.py`. You can also experience the Tongyi model like me: https://python.langchain.com/docs/integrations/llms/tongyi/ +Follow the official [LangChain Documentation](https://python.langchain.com/docs/introduction/) to create a sample file named `demo.py`. ```python from langchain_core.messages import HumanMessage, SystemMessage -from langchain_community.llms.tongyi import Tongyi +from langchain_openai import ChatOpenAI +import os + -chatLLM = Tongyi(model="qwen-turbo") +chatLLM = ChatOpenAI( + model="qwen-plus", + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", + api_key=os.environ.get("DASHSCOPE_API_KEY", ""), + temperature=0, + stream_usage=True, +) messages = [ SystemMessage( content="You are a helpful assistant that translates English to French." @@ -35,48 +47,48 @@ print(res) ## Quick Start -You can automatically instrument your LangChain application using the `opentelemetry-instrument` command: +You can automatically instrument your LangChain application using the `loongsuite-instrument` command: ```bash -export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true -opentelemetry-instrument \ +export OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental +export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=SPAN_ONLY +loongsuite-instrument \ --traces_exporter console \ --metrics_exporter console \ + --logs_exporter none \ python your_langchain_app.py ``` If everything is working correctly, you should see logs similar to the following ```json { - "name": "Tongyi", + "name": "chat qwen-plus", "context": { - "trace_id": "0x61d2c954558c3988f42770a946ea877e", - "span_id": "0x7bb229d6f75e52ad", + "trace_id": "0x153d9f32aeaef815a7ddc9ec406ef8fc", + "span_id": "0xc0c4107603054139", "trace_state": "[]" }, - "kind": "SpanKind.INTERNAL", + "kind": "SpanKind.CLIENT", "parent_id": null, - "start_time": "2025-08-14T07:30:38.783413Z", - "end_time": "2025-08-14T07:30:39.321573Z", + "start_time": "2026-03-10T06:04:56.411044Z", + "end_time": "2026-03-10T06:04:57.205725Z", "status": { - "status_code": "OK" + "status_code": "UNSET" }, "attributes": { - "gen_ai.span.kind": "llm", - "input.value": "{\"prompts\": [\"System: You are a helpful assistant that translates English to French.\\nHuman: Translate this sentence from English to French. I love programming.\"]}", - "input.mime_type": "application/json", - "output.value": "{\"generations\": [[{\"text\": \"J'adore la programmation.\", \"generation_info\": {\"finish_reason\": \"stop\", \"request_id\": \"463d2249-6424-9eef-8665-6ef88d4fcc7a\", \"token_usage\": {\"input_tokens\": 39, \"output_tokens\": 8, \"total_tokens\": 47, \"prompt_tokens_details\": {\"cached_tokens\": 0}}}, \"type\": \"Generation\"}]], \"llm_output\": {\"model_name\": \"qwen-turbo\"}, \"run\": null, \"type\": \"LLMResult\"}", - "output.mime_type": "application/json", - "gen_ai.prompt.0.content": "System: You are a helpful assistant that translates English to French.\nHuman: Translate this sentence from English to French. I love programming.", - "gen_ai.response.finish_reasons": "stop", - "gen_ai.usage.prompt_tokens": 39, - "gen_ai.usage.completion_tokens": 8, - "gen_ai.usage.total_tokens": 47, - "gen_ai.completion": [ - "J'adore la programmation." + "gen_ai.operation.name": "chat", + "gen_ai.span.kind": "LLM", + "gen_ai.request.model": "qwen-plus", + "gen_ai.provider.name": "openai", + "gen_ai.request.temperature": 0.0, + "gen_ai.response.finish_reasons": [ + "stop" ], - "gen_ai.response.model": "qwen-turbo", - "gen_ai.request.model": "qwen-turbo", - "metadata": "{\"ls_provider\": \"tongyi\", \"ls_model_type\": \"llm\", \"ls_model_name\": \"qwen-turbo\"}" + "gen_ai.response.model": "qwen-plus", + "gen_ai.usage.input_tokens": 36, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 44, + "gen_ai.input.messages": "[{\"role\":\"system\",\"parts\":[{\"content\":\"You are a helpful assistant that translates English to French.\",\"type\":\"text\"}]},{\"role\":\"user\",\"parts\":[{\"content\":\"Translate this sentence from English to French. I love programming.\",\"type\":\"text\"}]}]", + "gen_ai.output.messages": "[{\"role\":\"assistant\",\"parts\":[{\"content\":\"J\u2019adore la programmation.\",\"type\":\"text\"}],\"finish_reason\":\"stop\"}]" }, "events": [], "links": [], @@ -84,9 +96,9 @@ If everything is working correctly, you should see logs similar to th "attributes": { "telemetry.sdk.language": "python", "telemetry.sdk.name": "opentelemetry", - "telemetry.sdk.version": "1.35.0", - "service.name": "langchain_loon", - "telemetry.auto.version": "0.56b0" + "telemetry.sdk.version": "1.40.0", + "telemetry.auto.version": "0.61b0", + "service.name": "unknown_service" }, "schema_url": "" } @@ -101,16 +113,34 @@ export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT= export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT= -export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true +export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=SPAN_ONLY -opentelemetry-instrument +loongsuite-instrument ``` +## Traced Operations + +| Operation | Span Kind | Attributes | +|-----------|-----------|------------| +| Chain | `CHAIN` | `gen_ai.operation.name=chain`, `gen_ai.span.kind=CHAIN`, `input.value`, `output.value` (when content capture enabled). Span name: `chain {run.name}` (e.g. RetrievalQA, StuffDocumentsChain, LLMChain) | +| LLM / Chat | `LLM` | `gen_ai.operation.name=chat`, `gen_ai.request.model`, token usage | +| Agent | `AGENT` | `gen_ai.operation.name=invoke_agent` | +| ReAct Step | `STEP` | `gen_ai.operation.name=react`, `gen_ai.react.round`, `gen_ai.react.finish_reason` | +| Tool | `TOOL` | `gen_ai.operation.name=execute_tool` | +| Retriever | `RETRIEVER` | `gen_ai.operation.name=retrieve_documents` | + +ReAct Step spans are created for each Reasoning-Acting iteration, with the hierarchy: Agent > ReAct Step > LLM/Tool. Supported agent types: + +- **AgentExecutor** (LangChain 0.x / 1.x) — detected by `run.name` +- **LangGraph `create_react_agent`** — detected by `Run.metadata` (requires + `loongsuite-instrumentation-langgraph`). When invoked inside an outer graph + node, the agent span inherits the node's name for better readability. + ## Requirements -- Python >= 3.8 +- Python >= 3.9 - LangChain >= 0.1.0 - OpenTelemetry >= 1.20.0 diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/pyproject.toml index 2853b613f..a94902571 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/pyproject.toml +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/pyproject.toml @@ -5,58 +5,48 @@ build-backend = "hatchling.build" [project] name = "loongsuite-instrumentation-langchain" dynamic = ["version"] -description = "OpenTelemetry LangChain Instrumentation" +description = "LongSuite LangChain Instrumentation" readme = "README.md" license = "Apache-2.0" requires-python = ">=3.9" authors = [ - { name = "LoongSuite Python Agent Authors", email = "" }, + { name = "LoongSuite Python Agent Authors", email = "" }, ] classifiers = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] dependencies = [ - "opentelemetry-api ~= 1.37", - "opentelemetry-instrumentation >= 0.58b0", - "opentelemetry-semantic-conventions >= 0.58b0", + "opentelemetry-api ~= 1.37", + "opentelemetry-instrumentation >= 0.58b0", + "opentelemetry-semantic-conventions >= 0.58b0", + "opentelemetry-util-genai", + "wrapt", ] [project.optional-dependencies] instruments = [ - "langchain_core >= 0.1.0", -] -test = [ - "pytest", - "opentelemetry-sdk", - "langchain_core == 0.1.8", - "langchain == 0.1.0", - "langchain_openai == 0.0.2", - "langchain-community == 0.0.10", - "respx", - "numpy", - "openai", - "httpx", -] -type-check = [ - "langchain_core == 0.1.0", + "langchain_core >= 0.1.0", ] +[project.urls] +Homepage = "https://github.com/alibaba/loongsuite-python-agent" + [tool.hatch.version] path = "src/opentelemetry/instrumentation/langchain/version.py" [tool.hatch.build.targets.sdist] include = [ - "src", - "tests", + "src", + "tests", ] [tool.hatch.build.targets.wheel] diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py index 60bcc249b..a8b258e5a 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py @@ -12,16 +12,36 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +LoongSuite LangChain instrumentation supporting ``langchain_core >= 0.1.0``. + +Usage +----- +.. code:: python + + from opentelemetry.instrumentation.langchain import LangChainInstrumentor + + LangChainInstrumentor().instrument() + + # ... use LangChain as normal ... + + LangChainInstrumentor().uninstrument() + +API +--- +""" + +from __future__ import annotations + +import importlib +import logging from typing import TYPE_CHECKING, Any, Callable, Collection, Type from wrapt import wrap_function_wrapper -from opentelemetry import trace as trace_api -from opentelemetry.instrumentation.instrumentor import ( - BaseInstrumentor, # type: ignore -) +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.instrumentation.langchain.package import _instruments -from opentelemetry.metrics import Meter, get_meter +from opentelemetry.instrumentation.utils import unwrap if TYPE_CHECKING: from langchain_core.callbacks import BaseCallbackManager @@ -30,39 +50,212 @@ LoongsuiteTracer, ) +logger = logging.getLogger(__name__) + +__all__ = ["LangChainInstrumentor"] + +# Stored originals for AgentExecutor uninstrument: {class: (iter, aiter)} +_patched_agent_executors: dict[type, tuple[Any, Any]] = {} + +# Module paths that were successfully patched for create_agent (for uninstrument). +_patched_create_agent_locations: list[tuple[str, str]] = [] + + +def _get_agent_executor_classes() -> list[type]: + """Return all available AgentExecutor classes to patch. + + Both 0.x and 1.x may coexist: + - langchain 0.x: AgentExecutor in langchain.agents + - langchain 1.x + langchain-classic: AgentExecutor in langchain_classic.agents + + We patch both when both exist so either import path works. + """ + classes: list[type] = [] + try: + from langchain.agents import AgentExecutor # noqa: PLC0415 + + classes.append(AgentExecutor) + except ImportError as e: + logger.debug("langchain.agents.AgentExecutor not available: %s", e) + try: + from langchain_classic.agents import AgentExecutor # noqa: PLC0415 + + if AgentExecutor not in classes: + classes.append(AgentExecutor) + except ImportError as e: + logger.debug( + "langchain_classic.agents.AgentExecutor not available: %s", e + ) + return classes + + +def _instrument_agent_executor() -> bool: + """Apply ReAct step patch to AgentExecutor. Returns True if patched.""" + classes = _get_agent_executor_classes() + if not classes: + logger.debug("AgentExecutor not available, skipping ReAct patch") + return False + + from opentelemetry.instrumentation.langchain.internal.patch import ( # noqa: PLC0415 + _make_aiter_next_step_wrapper, + _make_iter_next_step_wrapper, + ) + + for cls in classes: + orig_iter = cls._iter_next_step + orig_aiter = cls._aiter_next_step + cls._iter_next_step = _make_iter_next_step_wrapper(orig_iter) + cls._aiter_next_step = _make_aiter_next_step_wrapper(orig_aiter) + _patched_agent_executors[cls] = (orig_iter, orig_aiter) + + logger.debug( + "Patched AgentExecutor._iter_next_step and _aiter_next_step (%d class(es))", + len(classes), + ) + return True + + +def _uninstrument_agent_executor() -> None: + """Restore original AgentExecutor methods.""" + if not _patched_agent_executors: + return + for cls, (orig_iter, orig_aiter) in list(_patched_agent_executors.items()): + try: + cls._iter_next_step = orig_iter + cls._aiter_next_step = orig_aiter + except Exception: # noqa: S110 + logger.debug( + "Failed to restore AgentExecutor methods for %s", + cls, + exc_info=True, + ) + logger.debug( + "Restored AgentExecutor._iter_next_step and _aiter_next_step (%d class(es))", + len(_patched_agent_executors), + ) + _patched_agent_executors.clear() + + +# ------------------------------------------------------------------ +# create_agent patch (langchain >= 1.x) +# ------------------------------------------------------------------ + +_REACT_AGENT_GRAPH_ATTR = "_loongsuite_react_agent" + + +def _create_agent_wrapper( + wrapped: Any, _instance: Any, args: Any, kwargs: Any +) -> Any: + """``wrapt`` wrapper for ``create_agent``. + + Calls the original factory, then marks the returned graph with + ``_loongsuite_react_agent = True`` so that the langgraph Pregel + stream/astream wrapper can inject metadata into ``RunnableConfig``. + """ + graph = wrapped(*args, **kwargs) + setattr(graph, _REACT_AGENT_GRAPH_ATTR, True) + return graph + -class LangChainInstrumentor(BaseInstrumentor): # type: ignore +def _get_create_agent_locations() -> list[tuple[str, str]]: + """Return (module, attribute) pairs for ``create_agent``. + + langchain >= 1.x exposes ``create_agent`` in ``langchain.agents``. """ - An instrumentor for LangChain + locations: list[tuple[str, str]] = [] + try: + from langchain.agents import create_agent # noqa: PLC0415, F401 + + locations.append(("langchain.agents", "create_agent")) + except ImportError as exc: + logger.debug("langchain.agents.create_agent not available: %s", exc) + return locations + + +def _instrument_create_agent() -> None: + """Wrap ``create_agent`` so that the returned graph is marked as a + ReAct agent, enabling downstream metadata injection by langgraph + instrumentation. """ + locations = _get_create_agent_locations() + if not locations: + logger.debug( + "create_agent not found in langchain.agents; " + "create_agent instrumentation skipped." + ) + return + + for module_path, attr_name in locations: + wrap_function_wrapper(module_path, attr_name, _create_agent_wrapper) + logger.debug("Patched %s.%s", module_path, attr_name) + + _patched_create_agent_locations.clear() + _patched_create_agent_locations.extend(locations) + + +def _uninstrument_create_agent() -> None: + """Restore original ``create_agent`` functions.""" + for module_path, attr_name in _patched_create_agent_locations: + try: + mod = importlib.import_module(module_path) + unwrap(mod, attr_name) + logger.debug("Restored %s.%s", module_path, attr_name) + except Exception as exc: # noqa: BLE001 + logger.debug( + "Failed to restore %s.%s: %s", module_path, attr_name, exc + ) + + _patched_create_agent_locations.clear() + + +class LangChainInstrumentor(BaseInstrumentor): + """An instrumentor for LangChain.""" def instrumentation_dependencies(self) -> Collection[str]: return _instruments def _instrument(self, **kwargs: Any) -> None: - if not (tracer_provider := kwargs.get("tracer_provider")): - tracer_provider = trace_api.get_tracer_provider() - tracer = trace_api.get_tracer(__name__, "", tracer_provider) from opentelemetry.instrumentation.langchain.internal._tracer import ( # noqa: PLC0415 LoongsuiteTracer, ) + from opentelemetry.util.genai.extended_handler import ( # noqa: PLC0415 + ExtendedTelemetryHandler, + ) + tracer_provider = kwargs.get("tracer_provider") meter_provider = kwargs.get("meter_provider") - meter = get_meter( - __name__, + logger_provider = kwargs.get("logger_provider") + + handler = ExtendedTelemetryHandler( + tracer_provider=tracer_provider, meter_provider=meter_provider, - schema_url="https://opentelemetry.io/schemas/1.11.0", + logger_provider=logger_provider, ) + wrap_function_wrapper( module="langchain_core.callbacks", name="BaseCallbackManager.__init__", wrapper=_BaseCallbackManagerInit( - tracer=tracer, meter=meter, cls=LoongsuiteTracer + cls=LoongsuiteTracer, + handler=handler, + tracer_provider=tracer_provider, ), ) + _instrument_agent_executor() + _instrument_create_agent() + def _uninstrument(self, **kwargs: Any) -> None: - pass + try: + import langchain_core.callbacks # noqa: PLC0415 + + unwrap(langchain_core.callbacks.BaseCallbackManager, "__init__") + logger.debug("Uninstrumented BaseCallbackManager.__init__") + except Exception as e: + logger.warning("Failed to uninstrument BaseCallbackManager: %s", e) + + _uninstrument_agent_executor() + _uninstrument_create_agent() class _BaseCallbackManagerInit: @@ -70,11 +263,14 @@ class _BaseCallbackManagerInit: def __init__( self, - tracer: trace_api.Tracer, - meter: Meter, cls: Type["LoongsuiteTracer"], + handler: Any, + tracer_provider: Any = None, ): - self._tracer_instance = cls(tracer=tracer, meter=meter) + self._tracer_instance = cls( + handler=handler, + tracer_provider=tracer_provider, + ) def __call__( self, @@ -85,8 +281,8 @@ def __call__( ) -> None: wrapped(*args, **kwargs) - for handler in instance.inheritable_handlers: - if isinstance(handler, type(self._tracer_instance)): + for h in instance.inheritable_handlers: + if isinstance(h, type(self._tracer_instance)): break else: instance.add_handler(self._tracer_instance, True) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py index e45c30e9b..f9545623c 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py @@ -12,900 +12,805 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json +""" +LoongSuite LangChain Tracer — data extraction phase. + +Extends ``langchain_core.tracers.base.BaseTracer`` and overrides the +fine-grained ``_on_*`` hooks to extract telemetry data from LangChain +``Run`` objects and emit OpenTelemetry spans via ``util-genai``. + +Context propagation follows the Robin/OpenLLMetry pattern: parent-child +span relationships are established by passing ``context`` explicitly to +``start_span`` / ``handler.start_*``, rather than using hazardous +``context_api.attach`` / ``detach`` in a callback system. + +The only exception is Chain spans: they use ``attach``/``detach`` so that +non-LangChain child operations (e.g. HTTP calls) nest correctly. + +Run type → handler mapping +-------------------------- +* **LLM / chat_model** → ``handler.start_llm`` / ``stop_llm`` / ``fail_llm`` +* **Chain (Agent)** → ``handler.start_invoke_agent`` / … +* **Chain (generic)** → direct span creation (no ``util-genai``) +* **Tool** → ``handler.start_execute_tool`` / … +* **Retriever** → ``handler.start_retrieve`` / … +""" + +from __future__ import annotations + import logging -import math -import os -import time -import traceback -from copy import deepcopy -from datetime import datetime, timezone -from enum import Enum -from itertools import chain +import timeit +from dataclasses import dataclass from threading import RLock -from typing import ( - Any, - Callable, - Dict, - Iterable, - Iterator, - List, - Mapping, - NamedTuple, - Optional, - Sequence, - Tuple, -) +from typing import Any, Literal, Optional from uuid import UUID -from langchain_core.messages import BaseMessage from langchain_core.tracers.base import BaseTracer from langchain_core.tracers.schemas import Run -from opentelemetry import context as context_api -from opentelemetry import trace as trace_api -from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY -from opentelemetry.metrics import Meter -from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes -from opentelemetry.util.types import AttributeValue - -from ._utils import _filter_base64_images, process_content -from .semconv import ( - CONTENT, - DOCUMENT_CONTENT, - DOCUMENT_METADATA, - INPUT_MIME_TYPE, +from opentelemetry import context as otel_context +from opentelemetry.context import Context +from opentelemetry.instrumentation.langchain.internal._utils import ( + LANGGRAPH_REACT_STEP_NODE, + _extract_finish_reasons, + _extract_invocation_params, + _extract_llm_input_messages, + _extract_llm_output_messages, + _extract_model_name, + _extract_provider, + _extract_response_model, + _extract_token_usage, + _extract_tool_definitions, + _has_langgraph_react_metadata, + _is_agent_run, + _safe_json, +) +from opentelemetry.instrumentation.langchain.internal.semconv import ( + GEN_AI_OPERATION_NAME, + GEN_AI_SPAN_KIND, INPUT_VALUE, - LLM_INPUT_MESSAGES, - LLM_MODEL_NAME, - LLM_OUTPUT_MESSAGES, - LLM_PROMPT_TEMPLATE, - LLM_PROMPT_TEMPLATE_VARIABLES, - LLM_RESPONSE_FINISH_REASON, - LLM_RESPONSE_MODEL_NAME, - LLM_SESSION_ID, - LLM_SPAN_KIND, - LLM_USAGE_COMPLETION_TOKENS, - LLM_USAGE_PROMPT_TOKENS, - LLM_USAGE_TOTAL_TOKENS, - LLM_USER_ID, - MESSAGE_CONTENT, - MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON, - MESSAGE_FUNCTION_CALL_NAME, - MESSAGE_NAME, - MESSAGE_ROLE, - MESSAGE_TOOL_CALLS, - METADATA, - OUTPUT_MIME_TYPE, OUTPUT_VALUE, - RETRIEVAL_DOCUMENTS, - TOOL_CALL_FUNCTION_ARGUMENTS_JSON, - TOOL_CALL_FUNCTION_NAME, - TOOL_DESCRIPTION, - TOOL_NAME, - MimeTypeValues, - SpanKindValues, +) +from opentelemetry.instrumentation.langchain.version import __version__ +from opentelemetry.trace import ( + Span, + SpanKind, + StatusCode, + get_tracer, + set_span_in_context, +) +from opentelemetry.util.genai._extended_common import ReactStepInvocation +from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler +from opentelemetry.util.genai.extended_types import ( + ExecuteToolInvocation, + InvokeAgentInvocation, + RetrieveInvocation, +) +from opentelemetry.util.genai.handler import _safe_detach +from opentelemetry.util.genai.types import ( + Error, + InputMessage, + LLMInvocation, + OutputMessage, + Text, +) +from opentelemetry.util.genai.utils import ( + ContentCapturingMode, + get_content_capturing_mode, + is_experimental_mode, ) logger = logging.getLogger(__name__) -ENABLE_LANGCHAIN_INSTRUMENTOR = "ENABLE_LANGCHAIN_INSTRUMENTOR" - - -def _is_enable(): - enable_instrumentor = os.getenv(ENABLE_LANGCHAIN_INSTRUMENTOR) - if enable_instrumentor is None: - return True - if enable_instrumentor.lower() == "false": +# --------------------------------------------------------------------------- +# _RunData — per-run bookkeeping +# --------------------------------------------------------------------------- +RunKind = Literal["llm", "agent", "chain", "tool", "retriever", "react_step"] + + +@dataclass +class _RunData: + run_kind: RunKind + span: Span | None = None + context: Context | None = None + invocation: Any = None + context_token: object | None = None # only used for Chain attach/detach + # Agent run only: ReAct Step state + react_round: int = 0 + active_step: "_RunData | None" = None + original_context: Context | None = None + is_langgraph_react: bool = False + inside_langgraph_react: bool = False + + +def _should_capture_chain_content() -> bool: + """Check if chain input/output content should be recorded.""" + try: + if not is_experimental_mode(): + return False + return get_content_capturing_mode() in ( + ContentCapturingMode.SPAN_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ) + except ValueError: + logger.debug( + "Content capturing mode check failed (experimental mode or mode value)", + exc_info=True, + ) return False - else: - return True -_AUDIT_TIMING = False +# --------------------------------------------------------------------------- +# LoongsuiteTracer +# --------------------------------------------------------------------------- -class _Run(NamedTuple): - span: trace_api.Span - context: context_api.Context +class LoongsuiteTracer(BaseTracer): + """LangChain tracer that emits OpenTelemetry spans via util-genai. + Context propagation is done explicitly — parent-child relationships + are established by passing the stored ``Context`` of the parent run + to ``handler.start_*(…, context=parent_ctx)`` or to + ``tracer.start_span(…, context=parent_ctx)``. -class LoongsuiteTracer(BaseTracer): - __slots__ = ("_tracer", "_runs", "_lock", "_meter") + Chain spans are the sole exception: they ``attach``/``detach`` the + context so that non-LangChain child operations nest correctly. + + All access to ``self._runs`` is protected by an ``RLock`` because + LangChain callbacks may be fired from different threads. + """ def __init__( - self, tracer: trace_api.Tracer, meter: Meter, *args: Any, **kwargs: Any + self, + handler: ExtendedTelemetryHandler, + tracer_provider: Any = None, + **kwargs: Any, ) -> None: - super().__init__(*args, **kwargs) - self._tracer = tracer - self._runs: Dict[UUID, _Run] = {} - self._lock = RLock() # handlers may be run in a thread by langchain - self._meter = meter - - def _start_trace(self, run: Run) -> None: - super()._start_trace(run) - if not _is_enable(): - return - if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): - return - with self._lock: - parent_context = ( - parent.context - if (parent_run_id := run.parent_run_id) - and (parent := self._runs.get(parent_run_id)) - else None - ) - # We can't use real time because the handler may be - # called in a background thread. - start_time_utc_nano = _as_utc_nano(run.start_time) - span = self._tracer.start_span( - name=run.name, - context=parent_context, - start_time=start_time_utc_nano, + super().__init__(_schema_format="original+chat", **kwargs) + self._handler = handler + self._tracer = get_tracer( + __name__, + __version__, + tracer_provider=tracer_provider, ) - context = trace_api.set_span_in_context(span) - # The following line of code is commented out to serve as a reminder that in a system - # of callbacks, attaching the context can be hazardous because there is no guarantee - # that the context will be detached. An error could happen between callbacks leaving - # the context attached forever, and all future spans will use it as parent. What's - # worse is that the error could have also prevented the span from being exported, - # leaving all future spans as orphans. That is a very bad scenario. - # token = context_api.attach(context) - with self._lock: - self._runs[run.id] = _Run(span=span, context=context) - - def _end_trace(self, run: Run) -> None: - super()._end_trace(run) - if not _is_enable(): - return - if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): - return - with self._lock: - if run.id not in self._runs: - logger.warning(f"Run Id: {run.id} is not in event data.") - return - event_data = self._runs.pop(run.id, None) - if event_data: - span = event_data.span - try: - _update_span(span, run) - except Exception: - logger.exception("Failed to update span with run data.") - # We can't use real time because the handler may be - # called in a background thread. - end_time_utc_nano = ( - _as_utc_nano(run.end_time) if run.end_time else None - ) - span.end(end_time=end_time_utc_nano) + self._runs: dict[UUID, _RunData] = {} + self._lock = RLock() + # Don't use super().run_map because it will lead to unexpected behavior when multiple tracers are used. + self.run_map = dict(self.run_map) + self.run_map_lock = RLock() def _persist_run(self, run: Run) -> None: pass - def on_llm_error( - self, error: BaseException, *args: Any, run_id: UUID, **kwargs: Any - ) -> Run: - logger.debug(f"on_llm_error: {error}") - with self._lock: - event_data = self._runs.get(run_id) - if event_data: - _record_exception(event_data.span, error) - return super().on_llm_error(error, *args, run_id=run_id, **kwargs) - - def on_chain_error( - self, error: BaseException, *args: Any, run_id: UUID, **kwargs: Any - ) -> Run: - logger.debug(f"on_chain_error: {error}") - with self._lock: - event_data = self._runs.get(run_id) - if event_data: - _record_exception(event_data.span, error) - return super().on_chain_error(error, *args, run_id=run_id, **kwargs) - - def on_retriever_error( - self, error: BaseException, *args: Any, run_id: UUID, **kwargs: Any - ) -> Run: - logger.debug(f"on_retriever_error: {error}") - with self._lock: - event_data = self._runs.get(run_id) - if event_data: - _record_exception(event_data.span, error) - return super().on_retriever_error( - error, *args, run_id=run_id, **kwargs - ) + # ------------------------------------------------------------------ + # Context helper + # ------------------------------------------------------------------ + + def _get_parent_context(self, run: Run) -> Context | None: + """Return the stored context of the parent run, or *None*.""" + parent_id = getattr(run, "parent_run_id", None) + if parent_id: + with self._lock: + rd = self._runs.get(parent_id) + if rd is not None: + return rd.context + return None + + # ------------------------------------------------------------------ + # _start_trace / _end_trace + # ------------------------------------------------------------------ + # We maintain only run_map (required for _complete_* / _errored_* to find + # the run). We do NOT call super() to avoid parent's order_map accumulation + # and unexpected behavior when multiple tracers are used. - def on_tool_error( - self, error: BaseException, *args: Any, run_id: UUID, **kwargs: Any - ) -> Run: - logger.debug(f"on_tool_error: {error}") + def _start_trace(self, run: Run) -> None: + with self.run_map_lock: + self.run_map[str(run.id)] = run + + def _end_trace(self, run: Run) -> None: + with self.run_map_lock: + self.run_map.pop(str(run.id), None) + + # ------------------------------------------------------------------ + # TTFT (Time To First Token) — streaming support + # ------------------------------------------------------------------ + + def on_llm_new_token( # type: ignore[override] + self, + token: str, + *, + chunk: Optional[Any] = None, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + **kwargs: Any, + ) -> Run | None: + """Record the first-token timestamp for TTFT calculation.""" with self._lock: - event_data = self._runs.get(run_id) - if event_data: - _record_exception(event_data.span, error) - return super().on_tool_error(error, *args, run_id=run_id, **kwargs) - - -def _record_exception(span: trace_api.Span, error: BaseException) -> None: - if isinstance(error, Exception): - span.record_exception(error) - return - exception_type = error.__class__.__name__ - exception_message = str(error) - if not exception_message: - exception_message = repr(error) - attributes: Dict[str, AttributeValue] = { - OTELSpanAttributes.EXCEPTION_TYPE: exception_type, - OTELSpanAttributes.EXCEPTION_MESSAGE: exception_message, - OTELSpanAttributes.EXCEPTION_ESCAPED: False, - } - try: - # See e.g. https://github.com/open-telemetry/opentelemetry-python/blob/e9c7c7529993cd13b4af661e2e3ddac3189a34d0/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py#L967 # noqa: E501 - attributes[OTELSpanAttributes.EXCEPTION_STACKTRACE] = ( - traceback.format_exc() - ) - except Exception: - logger.exception("Failed to record exception stacktrace.") - span.add_event(name="exception", attributes=attributes) - - -def _update_span(span: trace_api.Span, run: Run) -> None: - logger.debug(f"_update_span: {run}") - if run.error is None: - span.set_status(trace_api.StatusCode.OK) - else: - span.set_status( - trace_api.Status(trace_api.StatusCode.ERROR, run.error) - ) - span_kind = ( - SpanKindValues.AGENT - if "agent" in run.name.lower() - else _langchain_run_type_to_span_kind(run.run_type) - ) - span.set_attribute(LLM_SPAN_KIND, span_kind.value) - deepcopy_inputs = deepcopy(run.inputs) - filtered_inputs = _filter_base64_images(deepcopy_inputs) - span.set_attributes( - dict( - _flatten( - chain( - _as_input(_convert_io(filtered_inputs)), - _as_output(_convert_io(run.outputs)), - _prompts(filtered_inputs), - _input_messages(filtered_inputs), - _output_messages(run.outputs), - _prompt_template(run), - _model_name(run.extra), - _token_counts(run.outputs), - _tools(run), - _retrieval_documents(run), - _metadata(run), - ) - ) - ) - ) + rd = self._runs.get(run_id) + if ( + rd is not None + and rd.run_kind == "llm" + and rd.invocation is not None + ): + inv: LLMInvocation = rd.invocation + if inv.monotonic_first_token_s is None: + inv.monotonic_first_token_s = timeit.default_timer() + return None + # ------------------------------------------------------------------ + # LLM hooks + # ------------------------------------------------------------------ -def _langchain_run_type_to_span_kind(run_type: str) -> SpanKindValues: - try: - return SpanKindValues(run_type.lower()) - except ValueError: - return SpanKindValues.UNKNOWN + def _on_llm_start(self, run: Run) -> None: + self._handle_llm_start(run) + def _on_chat_model_start(self, run: Run) -> None: + self._handle_llm_start(run) -def _serialize_json(obj: Any) -> str: - if isinstance(obj, datetime): - return obj.isoformat() - return str(obj) + def _handle_llm_start(self, run: Run) -> None: + try: + parent_ctx = self._get_parent_context(run) + params = _extract_invocation_params(run) + invocation = LLMInvocation( + request_model=_extract_model_name(run) or run.name or "", + provider=_extract_provider(run), + input_messages=_extract_llm_input_messages(run), + temperature=params.get("temperature"), + top_p=params.get("top_p"), + max_tokens=params.get("max_tokens") + or params.get("max_output_tokens"), + ) + tool_defs = _extract_tool_definitions(run) + if tool_defs: + invocation.tool_definitions = tool_defs + self._handler.start_llm(invocation, context=parent_ctx) + rd = _RunData( + run_kind="llm", + span=invocation.span, + context=set_span_in_context(invocation.span) + if invocation.span + else None, + invocation=invocation, + ) + with self._lock: + self._runs[run.id] = rd + except Exception: + logger.debug("Failed to start LLM span", exc_info=True) + def _on_llm_end(self, run: Run) -> None: + with self._lock: + rd = self._runs.pop(run.id, None) + if rd is None or rd.run_kind != "llm": + return + try: + inv: LLMInvocation = rd.invocation + inv.output_messages = _extract_llm_output_messages(run) + inv.input_tokens, inv.output_tokens = _extract_token_usage(run) + inv.finish_reasons = _extract_finish_reasons(run) + inv.response_model_name = _extract_response_model(run) + self._handler.stop_llm(inv) + except Exception: + logger.debug("Failed to stop LLM span", exc_info=True) -def stop_on_exception( - wrapped: Callable[..., Iterator[Tuple[str, Any]]], -) -> Callable[..., Iterator[Tuple[str, Any]]]: - def wrapper(*args: Any, **kwargs: Any) -> Iterator[Tuple[str, Any]]: - start_time = time.perf_counter() + def _on_llm_error(self, run: Run) -> None: + with self._lock: + rd = self._runs.pop(run.id, None) + if rd is None or rd.run_kind != "llm": + return try: - yield from wrapped(*args, **kwargs) + err_str = getattr(run, "error", None) or "Unknown error" + self._handler.fail_llm( + rd.invocation, + Error(message=str(err_str), type=Exception), + ) except Exception: - logger.exception("Failed to get attribute.") - finally: - if _AUDIT_TIMING: - latency_ms = (time.perf_counter() - start_time) * 1000 - logger.debug(f"{wrapped.__name__}: {latency_ms:.3f}ms") - - return wrapper - - -@stop_on_exception -def _flatten( - key_values: Iterable[Tuple[str, Any]], -) -> Iterator[Tuple[str, AttributeValue]]: - for key, value in key_values: - if value is None: - continue - if isinstance(value, Mapping): - for sub_key, sub_value in _flatten(value.items()): - yield f"{key}.{sub_key}", sub_value - elif isinstance(value, List) and any( - isinstance(item, Mapping) for item in value - ): - for index, sub_mapping in enumerate(value): - for sub_key, sub_value in _flatten(sub_mapping.items()): - yield f"{key}.{index}.{sub_key}", sub_value - else: - if isinstance(value, Enum): - value = value.value - yield key, value + logger.debug("Failed to fail LLM span", exc_info=True) + # ------------------------------------------------------------------ + # Chain / Agent hooks + # ------------------------------------------------------------------ -@stop_on_exception -def _as_input(values: Iterable[str]) -> Iterator[Tuple[str, str]]: - return zip((INPUT_VALUE, INPUT_MIME_TYPE), values) + def _on_chain_start(self, run: Run) -> None: + try: + if _is_agent_run(run): + self._start_agent(run) + elif _has_langgraph_react_metadata(run): + self._handle_langgraph_chain_start(run) + else: + self._start_chain(run) + except Exception: + logger.debug("Failed to start Chain/Agent span", exc_info=True) + + def _handle_langgraph_chain_start(self, run: Run) -> None: + """Route a chain start that carries LangGraph ReAct metadata. + + Because ``config["metadata"]`` propagates to child callbacks, + both the graph-level run and its child nodes carry the flag. + We disambiguate by checking whether any ancestor is a LangGraph + ReAct agent (``is_langgraph_react``) or inside one + (``inside_langgraph_react``): + + * **Inside LangGraph agent** → child node (chain span, with + possible ReAct step transition). + * **Otherwise** → top-level graph → create Agent span. + """ + parent_id = getattr(run, "parent_run_id", None) + with self._lock: + parent_rd = self._runs.get(parent_id) if parent_id else None + inside = parent_rd is not None and ( + parent_rd.is_langgraph_react or parent_rd.inside_langgraph_react + ) -@stop_on_exception -def _as_output(values: Iterable[str]) -> Iterator[Tuple[str, str]]: - return zip((OUTPUT_VALUE, OUTPUT_MIME_TYPE), values) + if inside: + self._maybe_enter_langgraph_react_step(run) + self._start_chain(run) + else: + self._start_agent(run) + def _resolve_langgraph_agent_name(self, run: Run) -> str: + """Pick a meaningful agent name for a LangGraph ReAct agent. -def _convert_io(obj: Optional[Mapping[str, Any]]) -> Iterator[str]: - if not obj: - return - assert isinstance(obj, dict), f"expected dict, found {type(obj)}" - if len(obj) == 1 and isinstance(value := next(iter(obj.values())), str): - yield process_content(value) - else: - obj = dict(_replace_nan(obj)) - content = process_content( - json.dumps(obj, default=_serialize_json, ensure_ascii=False) - ) - yield process_content(content) - yield MimeTypeValues.JSON.value + When the inner graph uses the default name ``"LangGraph"`` and is + invoked as a node inside an outer graph, the parent node's name + (e.g. ``"product_agent"``) is far more descriptive. We prefer it + over the generic default. + """ + name = run.name or "" + if not _has_langgraph_react_metadata(run) or name != "LangGraph": + return name + parent_id = getattr(run, "parent_run_id", None) + if not parent_id: + return name -def _replace_nan(obj: Mapping[str, Any]) -> Iterator[Tuple[str, Any]]: - for k, v in obj.items(): - if isinstance(v, float) and not math.isfinite(v): - yield k, None - else: - yield k, v - - -@stop_on_exception -def _prompts( - inputs: Optional[Mapping[str, Any]], -) -> Iterator[Tuple[str, List[str]]]: - """Yields prompts if present.""" - if not inputs: - return - if not hasattr(inputs, "get"): - logger.warning( - f"Invalid input type for parameter 'inputs': expected mapping, found {type(inputs).__name__}" - ) - return - if prompts := inputs.get("prompts"): - idx = 0 - for prompt in prompts: - if isinstance(prompt, dict): - if "role" in prompt: - yield f"gen_ai.prompt.{idx}.role", f"{prompt['role']}" - if "text" in prompt: - yield ( - f"gen_ai.prompt.{idx}.content", - process_content(f"{prompt['text']}"), - ) - elif isinstance(prompt, str): - yield f"gen_ai.prompt.{idx}.content", process_content(prompt) - idx += 1 - - -@stop_on_exception -def _input_messages( - inputs: Optional[Mapping[str, Any]], -) -> Iterator[Tuple[str, List[Dict[str, Any]]]]: - """Yields chat messages if present.""" - if not inputs: - return - assert hasattr(inputs, "get"), f"expected Mapping, found {type(inputs)}" - # There may be more than one set of messages. We'll use just the first set. - if multiple_messages := inputs.get("messages"): - assert isinstance(multiple_messages, Iterable), ( - f"expected Iterable, found {type(multiple_messages)}" - ) - # This will only get the first set of messages. - if not (first_messages := next(iter(multiple_messages), None)): - return - parsed_messages = [] - if isinstance(first_messages, list): - for message_data in first_messages: - if isinstance(message_data, BaseMessage): - parsed_messages.append( - dict(_parse_message_data(message_data.to_json())) - ) - elif hasattr(message_data, "get"): - parsed_messages.append( - dict(_parse_message_data(message_data)) - ) - elif ( - isinstance(message_data, Sequence) - and len(message_data) == 2 - ): - # Handle tuple format (role, content) - role, content = message_data - parsed_messages.append( - {MESSAGE_ROLE: role, MESSAGE_CONTENT: content} - ) - else: - logger.warning( - f"Failed to parse message of type {type(message_data)}" - ) - elif isinstance(first_messages, BaseMessage): - parsed_messages.append( - dict(_parse_message_data(first_messages.to_json())) - ) - elif hasattr(first_messages, "get"): - parsed_messages.append(dict(_parse_message_data(first_messages))) - elif isinstance(first_messages, Sequence) and len(first_messages) == 2: - # Handle tuple format (role, content) - role, content = first_messages - parsed_messages.append( - {MESSAGE_ROLE: role, MESSAGE_CONTENT: content} - ) - else: - logger.warning( - f"Failed to parse messages of type {type(first_messages)}" + with self._lock: + parent_rd = self._runs.get(parent_id) + if parent_rd is None or parent_rd.run_kind != "chain": + return name + + span = parent_rd.span + if span is None: + return name + + parent_span_name: str = span.name + if parent_span_name.startswith("chain "): + return parent_span_name[len("chain ") :] + return name + + def _start_agent(self, run: Run) -> None: + parent_ctx = self._get_parent_context(run) + inputs = getattr(run, "inputs", None) or {} + input_messages: list[InputMessage] = [] + + # AgentExecutor format: {"input": "...", "query": "..."} + input_val = inputs.get("input") or inputs.get("query") or "" + if isinstance(input_val, str) and input_val: + input_messages.append( + InputMessage(role="user", parts=[Text(content=input_val)]) ) - if parsed_messages: - yield LLM_INPUT_MESSAGES, parsed_messages - elif multiple_prompts := inputs.get("prompts"): - assert isinstance(multiple_prompts, Iterable), ( - f"expected Iterable, found {type(multiple_prompts)}" + + # LangGraph format: {"messages": [...]} + if not input_messages: + raw_messages = inputs.get("messages") + if raw_messages and isinstance(raw_messages, list): + for msg in raw_messages: + converted = _extract_langgraph_input_message(msg) + if converted: + input_messages.append(converted) + + agent_name = self._resolve_langgraph_agent_name(run) + + invocation = InvokeAgentInvocation( + provider="langchain", + agent_name=agent_name, + input_messages=input_messages, ) - parsed_prompts = [] - for prompt_data in multiple_prompts: - assert isinstance(prompt_data, str), ( - f"expected str, found {type(prompt_data)}" - ) - parsed_prompts.append(dict(_parse_prompt_data(prompt_data))) - if parsed_prompts: - yield LLM_INPUT_MESSAGES, parsed_prompts - - -@stop_on_exception -def _output_messages( - outputs: Optional[Mapping[str, Any]], -) -> Iterator[Tuple[str, List[Dict[str, Any]]]]: - """Yields chat messages if present.""" - if not outputs: - return - assert hasattr(outputs, "get"), f"expected Mapping, found {type(outputs)}" - # There may be more than one set of generations. We'll use just the first set. - if not (multiple_generations := outputs.get("generations")): - return - assert isinstance(multiple_generations, Iterable), ( - f"expected Iterable, found {type(multiple_generations)}" - ) - # This will only get the first set of generations. - if not (first_generations := next(iter(multiple_generations), None)): - return - assert isinstance(first_generations, Iterable), ( - f"expected Iterable, found {type(first_generations)}" - ) - parsed_messages = [] - for generation in first_generations: - assert hasattr(generation, "get"), ( - f"expected Mapping, found {type(generation)}" + self._handler.start_invoke_agent(invocation, context=parent_ctx) + rd = _RunData( + run_kind="agent", + span=invocation.span, + context=set_span_in_context(invocation.span) + if invocation.span + else None, + invocation=invocation, + is_langgraph_react=_has_langgraph_react_metadata(run), ) - if message_data := generation.get("message"): - if isinstance(message_data, BaseMessage): - parsed_messages.append( - dict(_parse_message_data(message_data.to_json())) - ) - elif hasattr(message_data, "get"): - parsed_messages.append(dict(_parse_message_data(message_data))) - else: - logger.warning( - f"Failed to parse message of type {type(message_data)}" - ) - elif text := generation.get("text"): - parsed_messages.append(text) - if generation_info := generation.get("generation_info"): - assert hasattr(generation_info, "get"), ( - f"expected Mapping, found {type(generation_info)}" - ) - if finish_reason := generation_info.get("finish_reason"): - yield LLM_RESPONSE_FINISH_REASON, finish_reason - if token_usage := generation_info.get("token_usage"): - assert hasattr(token_usage, "get"), ( - f"expected Mapping, found {type(token_usage)}" - ) - for attribute_name, key in [ - (LLM_USAGE_PROMPT_TOKENS, "input_tokens"), - (LLM_USAGE_COMPLETION_TOKENS, "output_tokens"), - (LLM_USAGE_TOTAL_TOKENS, "total_tokens"), - ]: - if (token_count := token_usage.get(key)) is not None: - yield attribute_name, token_count - if parsed_messages: - yield LLM_OUTPUT_MESSAGES, parsed_messages - if not (llm_output := outputs.get("llm_output")): - return - assert hasattr(llm_output, "get"), ( - f"expected Mapping, found {type(llm_output)}" - ) - if model_name := llm_output.get("model_name"): - yield LLM_RESPONSE_MODEL_NAME, model_name - - -@stop_on_exception -def _parse_prompt_data( - prompt_data: Optional[str], -) -> Iterator[Tuple[str, Any]]: - if not prompt_data: - return - assert isinstance(prompt_data, str), ( - f"expected str, found {type(prompt_data)}" - ) - yield CONTENT, process_content(prompt_data) - - -@stop_on_exception -def _parse_message_data( - message_data: Optional[Mapping[str, Any]], -) -> Iterator[Tuple[str, Any]]: - """Parses message data to grab message role, content, etc. - - Example HumanMessage structure: - { - 'additional_kwargs': {'session_id': 's456', 'user_id': 'u123'}, - 'content': '分析这段代码的性能问题', - 'example': False, - 'id': None, - 'name': None, - 'response_metadata': {}, - 'type': 'human' - } - """ - if not message_data: - return - if not hasattr(message_data, "get"): - logger.warning( - f"Invalid message_data type: expected Mapping, found {type(message_data)}" - ) - return - id_ = message_data.get("id") - if not isinstance(id_, list) or not id_: - logger.warning( - f"Invalid message id format: expected non-empty list, found {type(id_)}" + with self._lock: + self._runs[run.id] = rd + + def _start_chain(self, run: Run) -> None: + parent_ctx = self._get_parent_context(run) + span = self._tracer.start_span( + name=f"chain {run.name}", + kind=SpanKind.INTERNAL, + context=parent_ctx, ) - return - message_class_name = id_[-1] - if message_class_name.startswith("HumanMessage"): - role = "user" - elif message_class_name.startswith("AIMessage"): - role = "assistant" - elif message_class_name.startswith("SystemMessage"): - role = "system" - elif message_class_name.startswith("FunctionMessage"): - role = "function" - elif message_class_name.startswith("ToolMessage"): - role = "tool" - elif message_class_name.startswith("ChatMessage"): - kwargs = message_data.get("kwargs", {}) - role = kwargs.get("role", "unknown") - else: - raise ValueError(f"Cannot parse message of type: {message_class_name}") - yield MESSAGE_ROLE, role - if kwargs := message_data.get("kwargs"): - assert hasattr(kwargs, "get"), ( - f"expected Mapping, found {type(kwargs)}" + + span.set_attribute(GEN_AI_OPERATION_NAME, "chain") + span.set_attribute(GEN_AI_SPAN_KIND, "CHAIN") + if _should_capture_chain_content(): + inputs = getattr(run, "inputs", None) or {} + span.set_attribute(INPUT_VALUE, _safe_json(inputs)) + + # Attach chain span context so non-LangChain children nest correctly. + ctx = set_span_in_context(span) + token = otel_context.attach(ctx) + + # Propagate inside_langgraph_react from parent so that + # grandchildren of the graph are also recognised as internal. + inside_lg = False + parent_id = getattr(run, "parent_run_id", None) + if parent_id: + with self._lock: + p = self._runs.get(parent_id) + if p is not None: + inside_lg = p.is_langgraph_react or p.inside_langgraph_react + + rd = _RunData( + run_kind="chain", + span=span, + context=ctx, + context_token=token, + inside_langgraph_react=inside_lg, ) - if content := kwargs.get("content"): - if isinstance(content, str): - yield MESSAGE_CONTENT, process_content(content) - elif isinstance(content, list): - # Handle list content (e.g., multimodal content) - for i, obj in enumerate(content): - if isinstance(obj, str): - yield f"{MESSAGE_CONTENT}.{i}", process_content(obj) - elif hasattr(obj, "get"): - yield ( - f"{MESSAGE_CONTENT}.{i}", - process_content(str(obj)), - ) - else: - logger.warning( - f"Unexpected content object type: {type(obj)}" - ) - else: - logger.warning(f"Unexpected content type: {type(content)}") + with self._lock: + self._runs[run.id] = rd - if name := kwargs.get("name"): - if isinstance(name, str): - yield MESSAGE_NAME, name - else: - logger.warning(f"Expected str for name, found {type(name)}") - if additional_kwargs := kwargs.get("additional_kwargs"): - assert hasattr(additional_kwargs, "get"), ( - f"expected Mapping, found {type(additional_kwargs)}" - ) - if function_call := additional_kwargs.get("function_call"): - assert hasattr(function_call, "get"), ( - f"expected Mapping, found {type(function_call)}" + def _on_chain_end(self, run: Run) -> None: + with self._lock: + rd = self._runs.pop(run.id, None) + if rd is None: + return + try: + if rd.run_kind == "agent": + self._stop_agent(run, rd) + elif rd.run_kind == "chain": + self._stop_chain(run, rd) + except Exception: + logger.debug("Failed to stop Chain/Agent span", exc_info=True) + + def _stop_agent(self, run: Run, rd: _RunData) -> None: + # End last ReAct step if still active. + # Cannot use _exit_react_step here because rd has already been + # popped from self._runs by _on_chain_end. + if rd.active_step is not None: + step_inv: ReactStepInvocation = rd.active_step.invocation + step_inv.finish_reason = "stop" + self._handler.stop_react_step(step_inv) + rd.active_step = None + + inv: InvokeAgentInvocation = rd.invocation + outputs = getattr(run, "outputs", None) or {} + + # AgentExecutor format + output_val = outputs.get("output") or outputs.get("result") or "" + if isinstance(output_val, str) and output_val: + inv.output_messages = [ + OutputMessage( + role="assistant", + parts=[Text(content=output_val)], + finish_reason="stop", ) - if name := function_call.get("name"): - assert isinstance(name, str), ( - f"expected str, found {type(name)}" - ) - yield MESSAGE_FUNCTION_CALL_NAME, name - if arguments := function_call.get("arguments"): - assert isinstance(arguments, str), ( - f"expected str, found {type(arguments)}" + ] + + # LangGraph format: {"messages": [...]} + if not inv.output_messages: + raw_messages = outputs.get("messages") + if raw_messages and isinstance(raw_messages, list): + last_msg = raw_messages[-1] + content = _extract_message_content(last_msg) + if content: + inv.output_messages = [ + OutputMessage( + role="assistant", + parts=[Text(content=content)], + finish_reason="stop", + ) + ] + + self._handler.stop_invoke_agent(inv) + + def _stop_chain(self, run: Run, rd: _RunData) -> None: + span = rd.span + if span is None: + return + if _should_capture_chain_content(): + outputs = getattr(run, "outputs", None) or {} + span.set_attribute(OUTPUT_VALUE, _safe_json(outputs)) + span.end() + _safe_detach(rd.context_token) + + def _on_chain_error(self, run: Run) -> None: + with self._lock: + rd = self._runs.pop(run.id, None) + if rd is None: + return + try: + err_str = getattr(run, "error", None) or "Unknown error" + if rd.run_kind == "agent": + # Fail active step directly (rd already popped from _runs). + if rd.active_step is not None: + step_inv = rd.active_step.invocation + self._handler.fail_react_step( + step_inv, + Error(message=str(err_str), type=Exception), ) - yield MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON, arguments - if tool_calls := additional_kwargs.get("tool_calls"): - assert isinstance(tool_calls, Iterable), ( - f"expected Iterable, found {type(tool_calls)}" + rd.active_step = None + self._handler.fail_invoke_agent( + rd.invocation, + Error(message=str(err_str), type=Exception), ) - message_tool_calls = [] - for tool_call in tool_calls: - if message_tool_call := dict(_get_tool_call(tool_call)): - message_tool_calls.append(message_tool_call) - if message_tool_calls: - yield MESSAGE_TOOL_CALLS, message_tool_calls - - -@stop_on_exception -def _get_tool_call( - tool_call: Optional[Mapping[str, Any]], -) -> Iterator[Tuple[str, Any]]: - if not tool_call: - return - assert hasattr(tool_call, "get"), ( - f"expected Mapping, found {type(tool_call)}" - ) - if function := tool_call.get("function"): - assert hasattr(function, "get"), ( - f"expected Mapping, found {type(function)}" - ) - if name := function.get("name"): - assert isinstance(name, str), f"expected str, found {type(name)}" - yield TOOL_CALL_FUNCTION_NAME, name - if arguments := function.get("arguments"): - assert isinstance(arguments, str), ( - f"expected str, found {type(arguments)}" + elif rd.run_kind == "chain": + span = rd.span + if span is not None: + span.set_status(StatusCode.ERROR, str(err_str)) + span.record_exception(Exception(str(err_str))) + span.end() + _safe_detach(rd.context_token) + except Exception: + logger.debug("Failed to fail Chain/Agent span", exc_info=True) + + # ------------------------------------------------------------------ + # Tool hooks + # ------------------------------------------------------------------ + + def _on_tool_start(self, run: Run) -> None: + try: + parent_ctx = self._get_parent_context(run) + inputs = getattr(run, "inputs", None) or {} + input_str = inputs.get("input") or inputs.get("query") or "" + if not isinstance(input_str, str): + input_str = _safe_json(input_str) + extra = getattr(run, "extra", None) or {} + tool_call_id = extra.get("tool_call_id") + invocation = ExecuteToolInvocation( + tool_name=run.name or "unknown_tool", + tool_call_arguments=input_str, + tool_call_id=tool_call_id, + ) + self._handler.start_execute_tool(invocation, context=parent_ctx) + rd = _RunData( + run_kind="tool", + span=invocation.span, + context=set_span_in_context(invocation.span) + if invocation.span + else None, + invocation=invocation, ) - yield TOOL_CALL_FUNCTION_ARGUMENTS_JSON, arguments + with self._lock: + self._runs[run.id] = rd + except Exception: + logger.debug("Failed to start Tool span", exc_info=True) + def _on_tool_end(self, run: Run) -> None: + with self._lock: + rd = self._runs.pop(run.id, None) + if rd is None or rd.run_kind != "tool": + return + try: + inv: ExecuteToolInvocation = rd.invocation + outputs = getattr(run, "outputs", None) or {} + output = outputs.get("output") or "" + if hasattr(output, "content"): + # Extract content from ToolMessage instance + output = output.content + if not isinstance(output, str): + output = _safe_json(output) + inv.tool_call_result = output + self._handler.stop_execute_tool(inv) + except Exception: + logger.debug("Failed to stop Tool span", exc_info=True) -@stop_on_exception -def _prompt_template(run: Run) -> Iterator[Tuple[str, Any]]: - """ - A best-effort attempt to locate the PromptTemplate object among the - keyword arguments of a serialized object, e.g. an LLMChain object. - """ - serialized: Optional[Mapping[str, Any]] = run.serialized - if not serialized: - return - assert hasattr(serialized, "get"), ( - f"expected Mapping, found {type(serialized)}" - ) - if not (kwargs := serialized.get("kwargs")): - return - assert isinstance(kwargs, dict), f"expected dict, found {type(kwargs)}" - for obj in kwargs.values(): - if not hasattr(obj, "get") or not (id_ := obj.get("id")): - continue - # The `id` field of the object is a list indicating the path to the - # object's class in the LangChain package, e.g. `PromptTemplate` in - # the `langchain.prompts.prompt` module is represented as - # ["langchain", "prompts", "prompt", "PromptTemplate"] - assert isinstance(id_, Sequence), f"expected list, found {type(id_)}" - if id_[-1].endswith("PromptTemplate"): - if not (kwargs := obj.get("kwargs")): - continue - assert hasattr(kwargs, "get"), ( - f"expected Mapping, found {type(kwargs)}" + def _on_tool_error(self, run: Run) -> None: + with self._lock: + rd = self._runs.pop(run.id, None) + if rd is None or rd.run_kind != "tool": + return + try: + err_str = getattr(run, "error", None) or "Unknown error" + self._handler.fail_execute_tool( + rd.invocation, + Error(message=str(err_str), type=Exception), ) - if not (template := kwargs.get("template", "")): - continue - yield LLM_PROMPT_TEMPLATE, template - if input_variables := kwargs.get("input_variables"): - assert isinstance(input_variables, list), ( - f"expected list, found {type(input_variables)}" - ) - template_variables = {} - for variable in input_variables: - if (value := run.inputs.get(variable)) is not None: - template_variables[variable] = value - if template_variables: - yield ( - LLM_PROMPT_TEMPLATE_VARIABLES, - json.dumps(template_variables, cls=_SafeJSONEncoder), - ) - break - - -@stop_on_exception -def _model_name( - extra: Optional[Mapping[str, Any]], -) -> Iterator[Tuple[str, str]]: - """Yields model name if present.""" - if not extra: - return - assert hasattr(extra, "get"), f"expected Mapping, found {type(extra)}" - if not (invocation_params := extra.get("invocation_params")): - return - for key in ["model_name", "model"]: - if name := invocation_params.get(key): - yield LLM_MODEL_NAME, name + except Exception: + logger.debug("Failed to fail Tool span", exc_info=True) + + # ------------------------------------------------------------------ + # Retriever hooks + # ------------------------------------------------------------------ + + def _on_retriever_start(self, run: Run) -> None: + try: + parent_ctx = self._get_parent_context(run) + inputs = getattr(run, "inputs", None) or {} + query = inputs.get("query") or "" + + invocation = RetrieveInvocation(query=query) + self._handler.start_retrieve(invocation, context=parent_ctx) + rd = _RunData( + run_kind="retriever", + span=invocation.span, + context=set_span_in_context(invocation.span) + if invocation.span + else None, + invocation=invocation, + ) + with self._lock: + self._runs[run.id] = rd + except Exception: + logger.debug("Failed to start Retriever span", exc_info=True) + + def _on_retriever_end(self, run: Run) -> None: + with self._lock: + rd = self._runs.pop(run.id, None) + if rd is None or rd.run_kind != "retriever": return + try: + inv: RetrieveInvocation = rd.invocation + outputs = getattr(run, "outputs", None) or {} + documents = outputs.get("documents") or [] + if documents: + inv.documents = _safe_json(documents) + self._handler.stop_retrieve(inv) + except Exception: + logger.debug("Failed to stop Retriever span", exc_info=True) + def _on_retriever_error(self, run: Run) -> None: + with self._lock: + rd = self._runs.pop(run.id, None) + if rd is None or rd.run_kind != "retriever": + return + try: + err_str = getattr(run, "error", None) or "Unknown error" + self._handler.fail_retrieve( + rd.invocation, + Error(message=str(err_str), type=Exception), + ) + except Exception: + logger.debug("Failed to fail Retriever span", exc_info=True) + + # ------------------------------------------------------------------ + # LangGraph ReAct Step — callback-based detection + # ------------------------------------------------------------------ + + def _maybe_enter_langgraph_react_step(self, run: Run) -> None: + """If *run* is a child node of a LangGraph ReAct agent whose name + equals ``LANGGRAPH_REACT_STEP_NODE`` (``"agent"``), trigger a ReAct + step transition: end the previous step (with ``"tool_calls"``) and + start a new one. + + Must be called **before** ``_start_chain`` so that the chain span + is parented under the step span. + """ + parent_id = getattr(run, "parent_run_id", None) + if not parent_id: + return -def get_attr_or_key(obj, name, default=None): - if hasattr(obj, name): - return getattr(obj, name) - if isinstance(obj, dict): - return obj.get(name, default) - return default - - -def extract_token_counts_from_token_usage(token_usage): - mapping = [ - ( - LLM_USAGE_PROMPT_TOKENS, - ["prompt_tokens", "PromptTokens", "input_tokens"], - ), - ( - LLM_USAGE_COMPLETION_TOKENS, - ["completion_tokens", "CompletionTokens", "output_tokens"], - ), - (LLM_USAGE_TOTAL_TOKENS, ["total_tokens", "TotalTokens"]), - ] - for attr, keys in mapping: - for key in keys: - if (value := get_attr_or_key(token_usage, key)) is not None: - yield attr, value - break - - -@stop_on_exception -def _token_counts( - outputs: Optional[Mapping[str, Any]], -) -> Iterator[Tuple[str, int]]: - """Yields token count information if present, from llm_output, generation_info, or response_metadata, only once.""" - if not outputs or not hasattr(outputs, "get"): - return - # 1. llm_output - if (llm_output := outputs.get("llm_output", None)) is not None: - if ( - token_usage := get_attr_or_key(llm_output, "token_usage") - ) is not None: - yield from extract_token_counts_from_token_usage(token_usage) - logger.debug("token get from llm_output") + with self._lock: + parent_rd = self._runs.get(parent_id) + if parent_rd is None or not parent_rd.is_langgraph_react: return - # 2. generations - generations = outputs.get("generations") - if generations and isinstance(generations, list): - for group in generations: - if not isinstance(group, list): - continue - for generation in group: - generation_info = get_attr_or_key( - generation, "generation_info" - ) - if token_usage := get_attr_or_key( - generation_info, "token_usage" - ): - yield from extract_token_counts_from_token_usage( - token_usage - ) - logger.debug("token get from generations generation_info") - return - message = get_attr_or_key(generation, "message") - if response_metadata := get_attr_or_key( - message, "response_metadata" - ): - if token_usage := get_attr_or_key( - response_metadata, "token_usage" - ): - yield from extract_token_counts_from_token_usage( - token_usage - ) - logger.debug( - "token get from generations message response_metadata" - ) - return - return - - -@stop_on_exception -def _tools(run: Run) -> Iterator[Tuple[str, str]]: - """Yields tool attributes if present.""" - if run.run_type.lower() != "tool": - return - if not (serialized := run.serialized): - return - assert hasattr(serialized, "get"), ( - f"expected Mapping, found {type(serialized)}" - ) - if name := serialized.get("name"): - yield TOOL_NAME, name - if description := serialized.get("description"): - yield TOOL_DESCRIPTION, description - - -@stop_on_exception -def _retrieval_documents( - run: Run, -) -> Iterator[Tuple[str, List[Mapping[str, Any]]]]: - if run.run_type.lower() != "retriever": - return - if not (outputs := run.outputs): - return - assert hasattr(outputs, "get"), f"expected Mapping, found {type(outputs)}" - documents = outputs.get("documents") - assert isinstance(documents, Iterable), ( - f"expected Iterable, found {type(documents)}" - ) - yield ( - RETRIEVAL_DOCUMENTS, - [dict(_as_document(document)) for document in documents], - ) - - -@stop_on_exception -def _metadata(run: Run) -> Iterator[Tuple[str, str]]: - """ - Takes the LangChain chain metadata and adds it to the trace - """ - if not run.extra or not (metadata := run.extra.get("metadata")): - return - if not isinstance(metadata, Mapping): - logger.warning( - f"Invalid metadata type: expected Mapping, found {type(metadata)}" - ) - return - # 获取 session_id - if session_id := ( - metadata.get("session_id") - or metadata.get("conversation_id") - or metadata.get("thread_id") - ): - yield LLM_SESSION_ID, session_id + chain_name = getattr(run, "name", "") or "" + if chain_name != LANGGRAPH_REACT_STEP_NODE: + return + + # End previous step (it had tool_calls since another round started) + if parent_rd.active_step is not None: + self._exit_react_step(parent_id, "tool_calls") + + self._enter_react_step(parent_id) - # 获取 user_id - if user_id := metadata.get("user_id"): - yield LLM_USER_ID, user_id + # ------------------------------------------------------------------ + # ReAct Step — called from patch wrapper or callback detection + # ------------------------------------------------------------------ + + def _enter_react_step(self, agent_run_id: UUID) -> None: + """Create a ReAct Step span and redirect child spans to it.""" + with self._lock: + agent_rd = self._runs.get(agent_run_id) + if agent_rd is None or agent_rd.run_kind != "agent": + return - yield METADATA, json.dumps(metadata) + if agent_rd.original_context is None: + agent_rd.original_context = agent_rd.context + agent_rd.react_round += 1 + inv = ReactStepInvocation(round=agent_rd.react_round) + self._handler.start_react_step(inv, context=agent_rd.original_context) -@stop_on_exception -def _as_document(document: Any) -> Iterator[Tuple[str, Any]]: - if page_content := getattr(document, "page_content", None): - assert isinstance(page_content, str), ( - f"expected str, found {type(page_content)}" + step_ctx = ( + set_span_in_context(inv.span) + if inv.span + else agent_rd.original_context ) - yield DOCUMENT_CONTENT, process_content(page_content) - if metadata := getattr(document, "metadata", None): - assert isinstance(metadata, Mapping), ( - f"expected Mapping, found {type(metadata)}" + agent_rd.active_step = _RunData( + run_kind="react_step", + span=inv.span, + context=step_ctx, + invocation=inv, ) - yield DOCUMENT_METADATA, json.dumps(metadata, cls=_SafeJSONEncoder) + agent_rd.context = step_ctx + def _exit_react_step(self, agent_run_id: UUID, finish_reason: str) -> None: + """End the current ReAct Step span and restore Agent context.""" + with self._lock: + agent_rd = self._runs.get(agent_run_id) + if agent_rd is None or agent_rd.active_step is None: + return -class _SafeJSONEncoder(json.JSONEncoder): - """ - A JSON encoder that falls back to the string representation of a - non-JSON-serializable object rather than raising an error. - """ + step_inv: ReactStepInvocation = agent_rd.active_step.invocation + step_inv.finish_reason = finish_reason + self._handler.stop_react_step(step_inv) + agent_rd.active_step = None + if agent_rd.original_context is not None: + agent_rd.context = agent_rd.original_context - def default(self, obj: Any) -> Any: - try: - return super().default(obj) - except TypeError: - return str(obj) + def _fail_react_step(self, agent_run_id: UUID, error_msg: str) -> None: + """Fail the current ReAct Step span and restore Agent context.""" + with self._lock: + agent_rd = self._runs.get(agent_run_id) + if agent_rd is None or agent_rd.active_step is None: + return + + step_inv: ReactStepInvocation = agent_rd.active_step.invocation + self._handler.fail_react_step( + step_inv, Error(message=error_msg, type=Exception) + ) + agent_rd.active_step = None + if agent_rd.original_context is not None: + agent_rd.context = agent_rd.original_context + + # ------------------------------------------------------------------ + # Deep copy / copy — return self (shared singleton) + # ------------------------------------------------------------------ + + def __deepcopy__(self, memo: dict) -> LoongsuiteTracer: + return self + def __copy__(self) -> LoongsuiteTracer: + return self -def _as_utc_nano(dt: datetime) -> int: - return int(dt.astimezone(timezone.utc).timestamp() * 1_000_000_000) + +# --------------------------------------------------------------------------- +# LangGraph message helpers (module-level, used by _start_agent / _stop_agent) +# --------------------------------------------------------------------------- + + +def _extract_langgraph_input_message(msg: Any) -> InputMessage | None: + """Convert a LangGraph input message to ``InputMessage``. + + LangGraph inputs may be LangChain message objects, tuples, or dicts. + """ + # Tuple: ("user", "hello") + if isinstance(msg, (list, tuple)) and len(msg) == 2: + role, content = msg + if isinstance(content, str) and content: + return InputMessage(role=str(role), parts=[Text(content=content)]) + return None + + # LangChain message object (HumanMessage, AIMessage, etc.) + content = getattr(msg, "content", None) + if content and isinstance(content, str): + role_map = { + "HumanMessage": "user", + "AIMessage": "assistant", + "SystemMessage": "system", + "ToolMessage": "tool", + } + cls_name = type(msg).__name__ + role = role_map.get(cls_name, "user") + return InputMessage(role=role, parts=[Text(content=content)]) + + return None + + +def _extract_message_content(msg: Any) -> str | None: + """Extract text content from a LangChain message object or dict.""" + content = getattr(msg, "content", None) + if content and isinstance(content, str): + return content + if isinstance(msg, dict): + return msg.get("content") or msg.get("text") + return None diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_utils.py index 4c1994eab..42b4327ba 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_utils.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_utils.py @@ -14,150 +14,410 @@ from __future__ import annotations -import ast +import json import logging -import sys -from os import environ from typing import Any -OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT = ( - "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT" +from opentelemetry.util.genai.types import ( + FunctionToolDefinition, + InputMessage, + OutputMessage, + Text, + ToolCall, + ToolCallResponse, ) logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# Agent detection +# --------------------------------------------------------------------------- -def recursive_size(obj: Any, max_size: int = 10240) -> int: - """递归计算对象大小,超过阈值时快速返回""" - total_size = 0 - if isinstance(obj, dict): - total_size += sys.getsizeof(obj) - if total_size > max_size: - return total_size - for key, value in obj.items(): - total_size += recursive_size( - key, max_size - total_size - ) + recursive_size(value, max_size - total_size) - if total_size > max_size: - return total_size - elif isinstance(obj, list): - total_size += sys.getsizeof(obj) - if total_size > max_size: - return total_size - for item in obj: - total_size += recursive_size(item, max_size - total_size) - if total_size > max_size: - return total_size - else: - total_size += sys.getsizeof(obj) - return total_size - - -def _is_base64_image(item: Any) -> bool: - """检查是否为base64编码的图片数据""" - if not isinstance(item, dict): - return False - if not isinstance(item.get("image_url"), dict): - return False - if "data:image/" not in item.get("image_url", {}).get("url", ""): - return False - return True - - -def _filter_base64_images(obj: Any) -> Any: - """递归过滤掉base64图片数据,保留其他信息""" - # 使用内存大小检测 - 如果数据量不大,直接返回 - # 256x256 图片 base64 约 12K 字符长度,这里设置阈值为 10KB - if recursive_size(obj) < 10240: # 10KB - return obj - - if isinstance(obj, list): - filtered_list = [] - for item in obj: - if isinstance(item, str) and "data:image/" in item: - # 处理字符串中包含base64图片数据的情况 - # 例如: "Human: [{'type': 'text', 'text': '简述这个图片'}, {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,...'}}]" - start_idx = item.find("[") - end_idx = item.rfind("]") - - if start_idx == -1 or end_idx == -1 or start_idx >= end_idx: - filtered_list.append(item) - continue - - try: - filtered_obj = item[start_idx : end_idx + 1] - # 解析列表 - parsed_list = ast.literal_eval(filtered_obj) - if isinstance(parsed_list, list): - # 递归处理解析后的列表 - filtered_parsed_list = _filter_base64_images( - parsed_list - ) - # 替换原字符串中的列表 - filtered_item = ( - item[:start_idx] - + str(filtered_parsed_list) - + item[end_idx + 1 :] - ) - filtered_list.append(filtered_item) - else: - filtered_list.append(item) - except Exception: - # 如果解析失败,保持原样 - filtered_list.append(item) - elif _is_base64_image(item): - # 保留图片信息但不包含base64数据 - filtered_item = { - "type": item.get("type", "image_url"), - "image_url": {"url": "BASE64_IMAGE_DATA_FILTERED"}, - } - filtered_list.append(filtered_item) - else: - filtered_list.append(_filter_base64_images(item)) - return filtered_list - elif isinstance(obj, dict): - filtered_dict = {} - for key, value in obj.items(): - if _is_base64_image(value): - # 如果字典值本身就是base64图片 - filtered_dict[key] = { - "type": value.get("type", "image_url"), - "image_url": {"url": "BASE64_IMAGE_DATA_FILTERED"}, - } +AGENT_RUN_NAMES = frozenset( + { + "AgentExecutor", + "MRKLChain", + "ReActChain", + "ReActTextWorldAgent", + "SelfAskWithSearchChain", + } +) + +_LANGGRAPH_REACT_METADATA_KEY = "_loongsuite_react_agent" + +LANGGRAPH_REACT_STEP_NODE = "agent" + + +def _is_agent_run(run: Any) -> bool: + """Return *True* for classic LangChain agents (name-based check only). + + LangGraph agents are detected separately via metadata — see + ``_has_langgraph_react_metadata`` — because their metadata propagates + to ALL child callbacks and must be disambiguated in the tracer. + """ + name = getattr(run, "name", "") or "" + return name in AGENT_RUN_NAMES + + +def _has_langgraph_react_metadata(run: Any) -> bool: + """Return *True* if *run* carries the LangGraph ReAct agent metadata. + + This flag is injected by ``loongsuite-instrumentation-langgraph`` + into ``config["metadata"]`` when ``Pregel.stream`` is called on a + graph marked with ``_loongsuite_react_agent = True``. + + Note: the metadata propagates to child runs, so the caller must + distinguish the top-level graph from child nodes. + """ + metadata = getattr(run, "metadata", None) or {} + return bool(metadata.get(_LANGGRAPH_REACT_METADATA_KEY)) + + +# --------------------------------------------------------------------------- +# Run data extraction helpers +# --------------------------------------------------------------------------- + + +def _extract_model_name(run: Any) -> str | None: + extra = getattr(run, "extra", None) or {} + params = extra.get("invocation_params") or {} + return ( + params.get("model_name") + or params.get("model") + or params.get("model_id") + ) + + +def _extract_provider(run: Any) -> str: + serialized = getattr(run, "serialized", None) or {} + id_list = serialized.get("id") or [] + if len(id_list) >= 3: + return id_list[2] + return "langchain" + + +def _extract_invocation_params(run: Any) -> dict[str, Any]: + extra = getattr(run, "extra", None) or {} + return extra.get("invocation_params") or {} + + +def _extract_tool_definitions(run: Any) -> list[FunctionToolDefinition]: + """Extract tool definitions from LangChain Run for LLM spans. + + Tools may appear in: + - run.extra["invocation_params"]["tools"] (e.g. from bind_tools) + - run.inputs["tools"] + + Supports OpenAI-style format: {"type": "function", "function": {...}} + and flat format: {"name": ..., "description": ..., "parameters": ...}. + """ + tool_definitions: list[FunctionToolDefinition] = [] + tools: list[Any] = [] + + params = _extract_invocation_params(run) + if params and "tools" in params: + raw = params["tools"] + if isinstance(raw, list): + tools = raw + elif hasattr(raw, "__iter__") and not isinstance(raw, (str, dict)): + tools = list(raw) + + if not tools: + inputs = getattr(run, "inputs", None) or {} + raw = inputs.get("tools") + if isinstance(raw, list): + tools = raw + elif hasattr(raw, "__iter__") and not isinstance(raw, (str, dict)): + tools = list(raw) + + for tool in tools: + if isinstance(tool, FunctionToolDefinition): + tool_definitions.append(tool) + continue + if isinstance(tool, dict): + func = tool.get("function", {}) + if isinstance(func, dict) and func.get("name"): + tool_definitions.append( + FunctionToolDefinition( + name=func.get("name", ""), + description=func.get("description"), + parameters=func.get("parameters"), + type="function", + ) + ) + elif "name" in tool: + tool_definitions.append( + FunctionToolDefinition( + name=tool.get("name", ""), + description=tool.get("description"), + parameters=tool.get("parameters"), + type="function", + ) + ) + elif hasattr(tool, "name") and hasattr(tool, "description"): + tool_definitions.append( + FunctionToolDefinition( + name=getattr(tool, "name", ""), + description=getattr(tool, "description"), + parameters=getattr(tool, "args_schema", None) + or getattr(tool, "parameters", None), + type="function", + ) + ) + + return tool_definitions + + +# --------------------------------------------------------------------------- +# LangChain message ↔ util-genai message conversion +# --------------------------------------------------------------------------- + + +def _convert_lc_message_to_input(msg: Any) -> InputMessage | None: + """Convert a LangChain message dict (dumpd format) to InputMessage.""" + if isinstance(msg, dict): + kwargs = msg.get("kwargs") or {} + role = msg.get("id", ["", "", ""]) + if isinstance(role, list) and len(role) >= 3: + role_name = role[-1].lower().replace("message", "") + role_map = { + "human": "user", + "ai": "assistant", + "system": "system", + "function": "tool", + "tool": "tool", + "chat": "user", + } + role_str = role_map.get(role_name, role_name) + else: + role_str = "user" + + content = kwargs.get("content", "") + parts = [] + + if role_str == "tool": + # ToolMessage: use ToolCallResponse with tool_call_id + tool_call_id = kwargs.get("tool_call_id") + if isinstance(content, str): + response_content = content + elif isinstance(content, list): + text_parts = [] + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text_parts.append(part.get("text", "")) + elif isinstance(part, str): + text_parts.append(part) + response_content = "\n".join(text_parts) if text_parts else "" else: - filtered_dict[key] = _filter_base64_images(value) - return filtered_dict - else: - return obj + response_content = str(content) if content else "" + parts.append( + ToolCallResponse(response=response_content, id=tool_call_id) + ) + else: + if isinstance(content, str) and content: + parts.append(Text(content=content)) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + parts.append(Text(content=part.get("text", ""))) + elif isinstance(part, str): + parts.append(Text(content=part)) + + tool_calls = kwargs.get("tool_calls") or [] + for tc in tool_calls: + if isinstance(tc, dict): + parts.append( + ToolCall( + name=tc.get("name", ""), + arguments=tc.get("args", {}), + id=tc.get("id"), + ) + ) + if parts: + return InputMessage(role=role_str, parts=parts) + return None -max_content_length = 4 * 1024 +def _extract_llm_input_messages(run: Any) -> list[InputMessage]: + """Extract input messages from a Run's inputs.""" + inputs = getattr(run, "inputs", None) or {} + messages: list[InputMessage] = [] + raw_messages = inputs.get("messages") + if raw_messages: + for batch in raw_messages: + if isinstance(batch, list): + for msg in batch: + converted = _convert_lc_message_to_input(msg) + if converted: + messages.append(converted) + if messages: + return messages -def process_content(content: str | None) -> str: - if is_capture_content_enabled(): - if content is not None and len(content) > max_content_length: - content = content[:max_content_length] + "..." - return content - elif content is None: - return "<0size>" - else: - return to_size(content) + prompts = inputs.get("prompts") + if prompts and isinstance(prompts, list): + for p in prompts: + if isinstance(p, str): + messages.append( + InputMessage(role="user", parts=[Text(content=p)]) + ) + return messages + return messages -def to_size(content: str) -> str: - if content is None: - return "<0size>" - size = len(content) - return f"<{size}size>" +def _extract_llm_output_messages(run: Any) -> list[OutputMessage]: + """Extract output messages from a completed Run.""" + outputs = getattr(run, "outputs", None) or {} + result: list[OutputMessage] = [] -def is_capture_content_enabled() -> bool: - capture_content = environ.get( - OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "true" + generations = outputs.get("generations") or [] + for gen_list in generations: + if not isinstance(gen_list, list): + continue + for gen in gen_list: + if not isinstance(gen, dict): + continue + text = gen.get("text", "") + parts = [] + if text: + parts.append(Text(content=text)) + + msg_data = gen.get("message") or {} + msg_kwargs = {} + if isinstance(msg_data, dict): + msg_kwargs = msg_data.get("kwargs") or {} + + tool_calls = msg_kwargs.get("tool_calls") or [] + for tc in tool_calls: + if isinstance(tc, dict): + parts.append( + ToolCall( + name=tc.get("name", ""), + arguments=tc.get("args", {}), + id=tc.get("id"), + ) + ) + + finish_reason = (gen.get("generation_info") or {}).get( + "finish_reason", "stop" + ) + if parts: + result.append( + OutputMessage( + role="assistant", + parts=parts, + finish_reason=finish_reason or "stop", + ) + ) + return result + + +def _parse_token_usage_dict(token_usage: Any) -> tuple[int | None, int | None]: + """Parse a token_usage/usage dict into (input_tokens, output_tokens).""" + if not isinstance(token_usage, dict): + return None, None + inp = token_usage.get("prompt_tokens") or token_usage.get("input_tokens") + out = token_usage.get("completion_tokens") or token_usage.get( + "output_tokens" + ) + return ( + int(inp) if inp is not None else None, + int(out) if out is not None else None, ) - return is_true_value(capture_content) -def is_true_value(value) -> bool: - return value.lower() in {"1", "y", "yes", "true"} +def _extract_token_usage(run: Any) -> tuple[int | None, int | None]: + """Return (input_tokens, output_tokens) from a completed LLM Run. + + Tries multiple LangChain formats in order: + 1. outputs["llm_output"]["token_usage"] or ["usage"] + 2. generations[i][j]["generation_info"]["token_usage"] or ["usage"] + 3. generations[i][j]["message"].response_metadata or ["kwargs"]["response_metadata"] + """ + outputs = getattr(run, "outputs", None) or {} + + # 1. Primary: llm_output.token_usage / llm_output.usage + llm_output = outputs.get("llm_output") or {} + token_usage = ( + llm_output.get("token_usage") or llm_output.get("usage") or {} + ) + inp, out = _parse_token_usage_dict(token_usage) + if inp is not None or out is not None: + return inp, out + + # 2. Fallback: generations[][].generation_info["token_usage"] or ["usage"] + # 3. Fallback: generations[][].message.response_metadata["token_usage"] + for gen_list in outputs.get("generations") or []: + if not isinstance(gen_list, list): + continue + for gen in gen_list: + if not isinstance(gen, dict): + continue + # Try generation_info + gen_info = gen.get("generation_info") or {} + token_usage = ( + gen_info.get("token_usage") or gen_info.get("usage") or {} + ) + inp, out = _parse_token_usage_dict(token_usage) + if inp is not None or out is not None: + return inp, out + # Try message.response_metadata (serialized: kwargs.response_metadata) + msg = gen.get("message") + if msg is None: + continue + if isinstance(msg, dict): + metadata = (msg.get("kwargs") or {}).get( + "response_metadata" + ) or {} + else: + metadata = getattr(msg, "response_metadata", None) or {} + if isinstance(metadata, dict): + token_usage = ( + metadata.get("token_usage") or metadata.get("usage") or {} + ) + inp, out = _parse_token_usage_dict(token_usage) + if inp is not None or out is not None: + return inp, out + + return None, None + + +def _extract_finish_reasons(run: Any) -> list[str] | None: + outputs = getattr(run, "outputs", None) or {} + reasons: list[str] = [] + for gen_list in outputs.get("generations") or []: + if not isinstance(gen_list, list): + continue + for gen in gen_list: + if not isinstance(gen, dict): + continue + info = gen.get("generation_info") or {} + reason = info.get("finish_reason") + if reason: + reasons.append(reason) + return reasons or None + + +def _extract_response_model(run: Any) -> str | None: + outputs = getattr(run, "outputs", None) or {} + llm_output = outputs.get("llm_output") or {} + return llm_output.get("model_name") or llm_output.get("model") + + +# --------------------------------------------------------------------------- +# JSON serialisation helper +# --------------------------------------------------------------------------- + + +def _safe_json(obj: Any, max_len: int = 4096) -> str: + try: + s = json.dumps(obj, ensure_ascii=False, default=str) + except Exception: + logger.debug( + "Failed to JSON serialize object, using str()", exc_info=True + ) + s = str(obj) + if len(s) > max_len: + s = s[:max_len] + "...[truncated]" + return s diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/patch.py new file mode 100644 index 000000000..11b0abb0e --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/patch.py @@ -0,0 +1,159 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +ReAct Step instrumentation patch for AgentExecutor. + +Monkey-patches AgentExecutor._iter_next_step and _aiter_next_step to +create ReAct Step spans via ExtendedTelemetryHandler for each iteration. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any, Iterator +from uuid import UUID + +if TYPE_CHECKING: + from collections.abc import AsyncIterator + +from opentelemetry.instrumentation.langchain.internal._tracer import ( + LoongsuiteTracer, +) + +logger = logging.getLogger(__name__) + +_REACT_STEP_LOGGER = "opentelemetry.instrumentation.langchain.react_step" + + +def _find_tracer(run_manager: Any) -> LoongsuiteTracer | None: + """Find LoongsuiteTracer from run_manager's handlers.""" + if run_manager is None: + return None + for handlers_attr in ("handlers", "inheritable_handlers"): + handlers = getattr(run_manager, handlers_attr, None) or [] + for h in handlers: + if isinstance(h, LoongsuiteTracer): + return h + return None + + +def _get_agent_run_id(run_manager: Any) -> UUID | None: + """Get the Agent run ID from run_manager.""" + if run_manager is None: + return None + run_id = getattr(run_manager, "run_id", None) + return run_id if isinstance(run_id, UUID) else None + + +def _make_iter_next_step_wrapper(original_fn: Any) -> Any: + """Wrap AgentExecutor._iter_next_step (sync generator).""" + + def patched_iter_next_step( + self: Any, + name_to_tool_map: Any, + color_mapping: Any, + inputs: Any, + intermediate_steps: Any, + run_manager: Any = None, + ) -> Iterator[Any]: + tracer = _find_tracer(run_manager) + agent_run_id = _get_agent_run_id(run_manager) + + if tracer is not None and agent_run_id is not None: + tracer._enter_react_step(agent_run_id) + + has_finish = False + try: + for item in original_fn( + self, + name_to_tool_map, + color_mapping, + inputs, + intermediate_steps, + run_manager, + ): + if _is_agent_finish(item): + has_finish = True + yield item + except Exception as exc: + if tracer is not None and agent_run_id is not None: + tracer._fail_react_step(agent_run_id, str(exc)) + logger.debug( + "ReAct step failed in _iter_next_step: %s", + exc, + exc_info=True, + ) + raise + else: + if tracer is not None and agent_run_id is not None: + finish_reason = "stop" if has_finish else "tool_calls" + tracer._exit_react_step(agent_run_id, finish_reason) + + return patched_iter_next_step + + +def _make_aiter_next_step_wrapper(original_fn: Any) -> Any: + """Wrap AgentExecutor._aiter_next_step (async generator).""" + + async def patched_aiter_next_step( + self: Any, + name_to_tool_map: Any, + color_mapping: Any, + inputs: Any, + intermediate_steps: Any, + run_manager: Any = None, + ) -> "AsyncIterator[Any]": + tracer = _find_tracer(run_manager) + agent_run_id = _get_agent_run_id(run_manager) + + if tracer is not None and agent_run_id is not None: + tracer._enter_react_step(agent_run_id) + + has_finish = False + try: + async for item in original_fn( + self, + name_to_tool_map, + color_mapping, + inputs, + intermediate_steps, + run_manager, + ): + if _is_agent_finish(item): + has_finish = True + yield item + except Exception as exc: + if tracer is not None and agent_run_id is not None: + tracer._fail_react_step(agent_run_id, str(exc)) + logger.debug( + "ReAct step failed in _aiter_next_step: %s", + exc, + exc_info=True, + ) + raise + else: + if tracer is not None and agent_run_id is not None: + finish_reason = "stop" if has_finish else "tool_calls" + tracer._exit_react_step(agent_run_id, finish_reason) + + return patched_aiter_next_step + + +def _is_agent_finish(item: Any) -> bool: + """Check if item is AgentFinish without importing at module load.""" + cls = getattr(item, "__class__", None) + if cls is None: + return False + return cls.__name__ == "AgentFinish" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/semconv.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/semconv.py index cae1d1e15..ba3c07805 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/semconv.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/semconv.py @@ -13,121 +13,39 @@ # limitations under the License. """ -OpenTelemetry LangChain Instrumentation Semantic Conventions +Semantic convention attributes for LangChain instrumentation. +Re-exports attributes from ``util-genai`` extended semconv so that the +plugin and its tests have a single import source. """ -from enum import Enum - - -# Define span kind values for LangChain operations -class SpanKindValues(Enum): - LLM = "llm" - CHAIN = "chain" - AGENT = "agent" - TOOL = "tool" - RETRIEVER = "retriever" - RERANKER = "reranker" - UNKNOWN = "unknown" - - -# Define MIME type values -class MimeTypeValues(Enum): - TEXT = "text/plain" - JSON = "application/json" - - -GEN_AI_PROMPT = "gen_ai.prompts" -CONTENT = "content" -MESSAGE_CONTENT = "message.content" - -# Service attributes -SERVICE_USER_ID = "service.user.id" -SERVICE_USER_NAME = "service.user.name" - -# Document attributes -DOCUMENT_CONTENT = "document.content" -DOCUMENT_ID = "document.id" -DOCUMENT_METADATA = "document.metadata" -DOCUMENT_SCORE = "document.score" - -# Tool call attributes -TOOL_CALL_FUNCTION_NAME = "tool_call.function.name" -TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments" -TOOL_CALL_FUNCTION_DESCRIPTION = "tool_call.function.description" -TOOL_CALL_FUNCTION_THOUGHTS = "tool_call.function.thoughts" - -# Input/Output attributes -INPUT_MIME_TYPE = "input.mime_type" +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) +from opentelemetry.util.genai._extended_semconv.gen_ai_extended_attributes import ( # noqa: E501 + GEN_AI_RETRIEVAL_DOCUMENTS, + GEN_AI_RETRIEVAL_QUERY, + GEN_AI_SPAN_KIND, + GEN_AI_TOOL_CALL_ARGUMENTS, + GEN_AI_TOOL_CALL_RESULT, +) + +GEN_AI_OPERATION_NAME = GenAI.GEN_AI_OPERATION_NAME + +GEN_AI_TOOL_CALL_ID = GenAI.GEN_AI_TOOL_CALL_ID + +__all__ = [ + "GEN_AI_OPERATION_NAME", + "GEN_AI_TOOL_CALL_ID", + "GEN_AI_RETRIEVAL_DOCUMENTS", + "GEN_AI_RETRIEVAL_QUERY", + "GEN_AI_SPAN_KIND", + "GEN_AI_TOOL_CALL_ARGUMENTS", + "GEN_AI_TOOL_CALL_RESULT", + "INPUT_VALUE", + "OUTPUT_VALUE", +] + +# Input/Output attributes (used for Chain spans) INPUT_VALUE = "input.value" -OUTPUT_MIME_TYPE = "output.mime_type" OUTPUT_VALUE = "output.value" - -# Span kind attribute -LLM_SPAN_KIND = "gen_ai.span.kind" - -# LLM specific attributes -LLM_MODEL_NAME = "gen_ai.request.model" -LLM_PROMPT_TEMPLATE = "gen_ai.prompt.template" -LLM_PROMPT_TEMPLATE_VARIABLES = "gen_ai.prompt.variables" -LLM_PROMPT_TEMPLATE_VERSION = "gen_ai.prompt.version" - -LLM_REQUEST_PARAMETERS = "gen_ai.request.parameters" -LLM_REQUEST_MODEL_NAME = "gen_ai.request.model" -LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens" -LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature" -LLM_REQUEST_TOP_P = "gen_ai.request.top_p" -LLM_REQUEST_STREAM = "gen_ai.request.is_stream" -LLM_REQUEST_STOP_SEQUENCES = "gen_ai.request.stop_sequences" -LLM_REQUEST_TOOL_CALLS = "gen_ai.request.tool_calls" - -LLM_RESPONSE_MODEL_NAME = "gen_ai.response.model" -LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons" - -LLM_PROMPTS = "gen_ai.prompt" - -LLM_OUTPUT_MESSAGES = "gen_ai.completion" - -LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens" -LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens" -LLM_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens" - -LLM_SESSION_ID = "gen_ai.session.id" -LLM_USER_ID = "gen_ai.user.id" - -# Embedding attributes -EMBEDDING_MODEL_NAME = "embedding.model.name" -EMBEDDING_EMBEDDINGS = "embedding.embeddings" -EMBEDDING_TEXT = "embedding.text" -EMBEDDING_VECTOR = "embedding.vector" - -# Retrieval attributes -RETRIEVAL_DOCUMENTS = "retrieval.documents" - -# Reranker attributes -RERANKER_QUERY = "reranker.query" -RERANKER_MODEL_NAME = "reranker.model.name" -RERANKER_TOP_K = "reranker.top_k" -RERANKER_INPUT_DOCUMENTS = "reranker.input.documents" -RERANKER_OUTPUT_DOCUMENTS = "reranker.output.documents" - -# Tool attributes -TOOL_NAME = "tool.name" -TOOL_DESCRIPTION = "tool.description" -TOOL_PARAMETERS = "tool.parameters" - -# Service attributes -SERVICE_NAME = "service.name" -SERVICE_VERSION = "service.version" -SERVICE_OWNER_ID = "service.owner.id" -SERVICE_OWNER_SUB_ID = "service.owner.sub_id" - -# Message attributes -LLM_INPUT_MESSAGES = "gen_ai.prompt" -MESSAGE_CONTENT = "content" -MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "function_call.arguments" -MESSAGE_FUNCTION_CALL_NAME = "function_call.name" -MESSAGE_NAME = "name" -MESSAGE_ROLE = "role" -MESSAGE_TOOL_CALLS = "tool_calls" -METADATA = "metadata" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/__init__.py new file mode 100644 index 000000000..f87ce79b7 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/__init__.py @@ -0,0 +1,14 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/conftest.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/conftest.py new file mode 100644 index 000000000..350e1907b --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/conftest.py @@ -0,0 +1,131 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test configuration for LangChain Instrumentation.""" + +import os + +import pytest + +from opentelemetry._logs import set_logger_provider +from opentelemetry.instrumentation.langchain import LangChainInstrumentor +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import ( + InMemoryLogExporter, + SimpleLogRecordProcessor, +) +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import InMemoryMetricReader +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) +from opentelemetry.util.genai.environment_variables import ( + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, +) + + +def pytest_configure(config: pytest.Config): + os.environ["OTEL_SEMCONV_STABILITY_OPT_IN"] = "gen_ai_latest_experimental" + + +# ==================== Exporters and Readers ==================== + + +@pytest.fixture(scope="function", name="span_exporter") +def fixture_span_exporter(): + exporter = InMemorySpanExporter() + yield exporter + + +@pytest.fixture(scope="function", name="metric_reader") +def fixture_metric_reader(): + reader = InMemoryMetricReader() + yield reader + + +@pytest.fixture(scope="function", name="log_exporter") +def fixture_log_exporter(): + exporter = InMemoryLogExporter() + yield exporter + + +# ==================== Providers ==================== + + +@pytest.fixture(scope="function", name="tracer_provider") +def fixture_tracer_provider(span_exporter): + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + return provider + + +@pytest.fixture(scope="function", name="meter_provider") +def fixture_meter_provider(metric_reader): + meter_provider = MeterProvider( + metric_readers=[metric_reader], + ) + return meter_provider + + +@pytest.fixture(scope="function", name="logger_provider") +def fixture_logger_provider(log_exporter): + provider = LoggerProvider() + provider.add_log_record_processor(SimpleLogRecordProcessor(log_exporter)) + set_logger_provider(provider) + return provider + + +# ==================== Instrumentation Fixtures ==================== + + +@pytest.fixture(scope="function") +def instrument( + tracer_provider, meter_provider, logger_provider, span_exporter +): + """Instrument with content capture enabled (SPAN_ONLY).""" + os.environ[OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT] = ( + "SPAN_ONLY" + ) + instrumentor = LangChainInstrumentor() + instrumentor.instrument( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + yield instrumentor + instrumentor.uninstrument() + span_exporter.clear() + os.environ.pop(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, None) + + +@pytest.fixture(scope="function") +def instrument_no_content( + tracer_provider, meter_provider, logger_provider, span_exporter +): + """Instrument without capturing message content.""" + os.environ[OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT] = ( + "NO_CONTENT" + ) + instrumentor = LangChainInstrumentor() + instrumentor.instrument( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + yield instrumentor + instrumentor.uninstrument() + span_exporter.clear() + os.environ.pop(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, None) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_base64_filter.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_base64_filter.py deleted file mode 100644 index 9a729cecc..000000000 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_base64_filter.py +++ /dev/null @@ -1,331 +0,0 @@ -# Copyright The OpenTelemetry Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest.mock import patch - -import pytest - -from opentelemetry.instrumentation.langchain.internal._utils import ( - _filter_base64_images, - _is_base64_image, - recursive_size, -) - - -class TestBase64Filter: - """测试base64图片过滤功能""" - - def test_is_base64_image(self): - """测试base64图片检测功能""" - base64_image = { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABA" - + "a" * 12140 - }, - } - normal_image = { - "type": "image_url", - "image_url": {"url": "http://example.com/image.jpg"}, - } - - assert _is_base64_image(base64_image) is True - assert _is_base64_image(normal_image) is False - assert _is_base64_image({"text": "Hello world"}) is False - assert _is_base64_image("not a dict") is False - assert _is_base64_image(None) is False - - def test_recursive_size(self): - """测试递归大小计算功能""" - assert recursive_size("hello") > 0 - assert recursive_size([1, 2, 3]) > 0 - assert recursive_size({"a": 1, "b": 2}) > 0 - assert ( - recursive_size({"level1": {"level2": {"data": [1, 2, 3, 4, 5]}}}) - > 0 - ) - assert recursive_size({"key": "x" * 20000}) > 10240 - - def test_recursive_size_early_return(self): - """测试recursive_size函数中的早退分支""" - # 创建一个大的字典以触发早退 - large_dict = {f"key{i}": f"value{i}" * 100 for i in range(100)} - size = recursive_size(large_dict, max_size=100) - assert size > 100 - - # 创建一个大的列表以触发早退 - large_list = [f"item{i}" * 100 for i in range(100)] - size = recursive_size(large_list, max_size=100) - assert size > 100 - - @pytest.mark.parametrize( - "test_input,expected_filtered", - [ - ( - { - "prompts": [ - "Hello, this is a normal text without base64 images" - ] - }, - { - "prompts": [ - "Hello, this is a normal text without base64 images" - ] - }, - ), - # 包含小base64数据的情况 - ( - { - "messages": [ - { - "content": [ - {"type": "text", "text": "简述这个图片"}, - { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,/9j/4AAQ" - }, - }, - ], - "role": "user", - } - ], - "model": "qwen-vl-max", - "stream": False, - }, - { - "messages": [ - { - "content": [ - {"type": "text", "text": "简述这个图片"}, - { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,/9j/4AAQ" - }, - }, - ], - "role": "user", - } - ], - "model": "qwen-vl-max", - "stream": False, - }, - ), - # 列表中的base64图片过滤 - ( - [ - {"type": "text", "text": "简述这个图片"}, - { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,/9j/4AAQSkZJR" - + "a" * 12140 - }, - }, - ], - [ - {"type": "text", "text": "简述这个图片"}, - { - "type": "image_url", - "image_url": {"url": "BASE64_IMAGE_DATA_FILTERED"}, - }, - ], - ), - # 字符串中的base64图片过滤 - ( - { - "prompts": [ - "Human: [{'type': 'text', 'text': '简述这个图片'}, {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,/9j/4AAQ+" - + "a" * 12140 - + "'}}]" - ] - }, - { - "prompts": [ - "Human: [{'type': 'text', 'text': '简述这个图片'}, {'type': 'image_url', 'image_url': {'url': 'BASE64_IMAGE_DATA_FILTERED'}}]" - ] - }, - ), - # 边界情况 - (None, None), - ([], []), - ({}, {}), - ("string", "string"), - (123, 123), - # 混合数据 - ( - { - "normal_text": "Hello", - "base64_image": { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,/9j/SkZJRgABA" - + "a" * 12140 - }, - }, - "normal_dict": {"key": "value"}, - }, - { - "normal_text": "Hello", - "base64_image": { - "type": "image_url", - "image_url": {"url": "BASE64_IMAGE_DATA_FILTERED"}, - }, - "normal_dict": {"key": "value"}, - }, - ), - # 多个base64图片 - ( - { - "content": [ - {"type": "text", "text": "第一张图片"}, - { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,/9j/4AAQSkZJRA" - + "a" * 12140 - }, - }, - {"type": "text", "text": "第二张图片"}, - { - "type": "image_url", - "image_url": { - "url": "data:image/png;base64,iVBORw0KGgoA" - + "a" * 12140 - }, - }, - ] - }, - { - "content": [ - {"type": "text", "text": "第一张图片"}, - { - "type": "image_url", - "image_url": {"url": "BASE64_IMAGE_DATA_FILTERED"}, - }, - {"type": "text", "text": "第二张图片"}, - { - "type": "image_url", - "image_url": {"url": "BASE64_IMAGE_DATA_FILTERED"}, - }, - ] - }, - ), - ], - ) - def test_filter_base64_images(self, test_input, expected_filtered): - """测试base64图片过滤功能""" - result = _filter_base64_images(test_input) - assert result == expected_filtered - - def test_filter_base64_images_large_data(self): - """测试大数据量进行过滤""" - large_data = { - "messages": [ - { - "content": [ - {"type": "text", "text": "简述这个图片"}, - { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,/9j/4AAQ" - }, - }, - ], - "role": "user", - } - ], - "model": "qwen-vl-max", - "stream": False, - } - large_data["messages"][0]["content"][1]["image_url"]["url"] = ( - "data:image/jpeg;base64,/9j/4AAQSk" + "a" * 12140 - ) - filtered = _filter_base64_images(large_data) - assert filtered != large_data - assert ( - filtered["messages"][0]["content"][1]["image_url"]["url"] - == "BASE64_IMAGE_DATA_FILTERED" - ) - - def test_filter_base64_images_early_return(self): - """测试_filter_base64_images函数中的早退分支""" - # 测试没有找到'['或']'的情况 - miss_left = { - "prompts": [ - "Human: {'type': 'text', 'text': '简述这个图片'}, {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,/9j/4AAQ+" - + "a" * 12140 - + "'}}]" - ] - } - result = _filter_base64_images(miss_left) - assert result == miss_left - miss_right = { - "prompts": [ - "Human: [{'type': 'text', 'text': '简述这个图片'}, {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,/9j/4AAQ+" - + "a" * 12140 - + "'}}" - ] - } - result = _filter_base64_images(miss_right) - assert result == miss_right - - def test_filter_base64_images_early_return_and_exception(self): - """测试_filter_base64_images函数中的早退分支和异常处理""" - - # 创建一个大的对象以绕过早期大小检查,但格式无效以触发异常 - miss_right = { - "prompts": [ - "Human: [{'type': 'text', 'text': '简述这个图片'}, {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,/9j/4AAQ+" - + "a" * 20000 - + "'}}" - + "a" * 10000 - ] - } - result = _filter_base64_images(miss_right) - # 应该返回原始对象,因为解析失败时保持原样 - assert result == miss_right - - # 新增mock测试用例 - def test_filter_base64_images_with_syntax_error_mock(self): - """使用mock模拟ast.literal_eval抛出SyntaxError异常""" - test_input = { - "prompts": [ - "Human: [{'type': 'text', 'text': '简述这个图片'}, {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,/9j/4AAQ+" - + "a" * 12140 - + "'}}]" - ] - } - - with patch( - "opentelemetry.instrumentation.langchain.internal._utils.ast.literal_eval" - ) as mock_literal_eval: - mock_literal_eval.side_effect = SyntaxError("mocked syntax error") - result = _filter_base64_images(test_input) - # 应该返回原始对象,因为解析失败时保持原样 - assert result == test_input - - def test_filter_base64_images_unexpected_type(self): - """测试_filter_base64_images函数中的非期望类型直接返回""" - - miss = ( - "Human: [{'type': 'text', 'text': '简述这个图片'}, {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,/9j/4AAQ+" - + "a" * 20000 - + "'}}" - + "a" * 10000 - ) - result = _filter_base64_images(miss) - # 应该返回原始对象,因为解析失败时保持原样 - assert result == miss diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_langchain_instrumentor.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_langchain_instrumentor.py deleted file mode 100644 index 4931497b4..000000000 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_langchain_instrumentor.py +++ /dev/null @@ -1,668 +0,0 @@ -# Copyright The OpenTelemetry Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -import asyncio -import json -import logging -import os -import random -from contextlib import suppress -from itertools import count -from typing import ( - Any, - AsyncIterator, - Dict, - Generator, - Iterable, - Iterator, - List, - Tuple, -) - -import numpy as np -import openai -import pytest -from httpx import AsyncByteStream, Response, SyncByteStream -from langchain.chains import RetrievalQA -from langchain_community.embeddings import FakeEmbeddings -from langchain_community.retrievers import KNNRetriever -from langchain_core.prompts import PromptTemplate -from langchain_openai import ChatOpenAI -from respx import MockRouter - -from opentelemetry import trace as trace_api -from opentelemetry.instrumentation.langchain import LangChainInstrumentor -from opentelemetry.instrumentation.langchain.internal.semconv import ( - CONTENT, - DOCUMENT_CONTENT, - INPUT_MIME_TYPE, - INPUT_VALUE, - LLM_MODEL_NAME, - LLM_OUTPUT_MESSAGES, - LLM_PROMPT_TEMPLATE, - LLM_PROMPT_TEMPLATE_VARIABLES, - LLM_PROMPTS, - LLM_RESPONSE_FINISH_REASON, - LLM_RESPONSE_MODEL_NAME, - LLM_SPAN_KIND, - LLM_USAGE_COMPLETION_TOKENS, - LLM_USAGE_PROMPT_TOKENS, - LLM_USAGE_TOTAL_TOKENS, - MESSAGE_CONTENT, - MESSAGE_ROLE, - METADATA, - OUTPUT_MIME_TYPE, - OUTPUT_VALUE, - RETRIEVAL_DOCUMENTS, - MimeTypeValues, - SpanKindValues, -) -from opentelemetry.sdk import trace as trace_sdk -from opentelemetry.sdk.metrics import MeterProvider -from opentelemetry.sdk.metrics.export import InMemoryMetricReader -from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( - InMemorySpanExporter, -) - -for name, logger in logging.root.manager.loggerDict.items(): - if name.startswith("opentelemetry.") and isinstance( - logger, logging.Logger - ): - logger.setLevel(logging.DEBUG) - logger.handlers.clear() - logger.addHandler(logging.StreamHandler()) - - -@pytest.mark.parametrize("is_async", [False, True]) -@pytest.mark.parametrize("is_stream", [False, True]) -@pytest.mark.parametrize("status_code", [200, 400]) -def test_callback_llm( - is_async: bool, - is_stream: bool, - status_code: int, - respx_mock: MockRouter, - in_memory_span_exporter: InMemorySpanExporter, - documents: List[str], - chat_completion_mock_stream: Tuple[List[bytes], List[Dict[str, Any]]], - model_name: str, - completion_usage: Dict[str, Any], -) -> None: - question = randstr() - template = "{context}{question}" - prompt = PromptTemplate( - input_variables=["context", "question"], template=template - ) - output_messages: List[Dict[str, Any]] = ( - chat_completion_mock_stream[1] - if is_stream - else [{"role": randstr(), "content": randstr()}] - ) - url = "https://api.openai.com/v1/chat/completions" - respx_kwargs: Dict[str, Any] = { - **( - {"stream": MockByteStream(chat_completion_mock_stream[0])} - if is_stream - else { - "json": { - "choices": [ - { - "index": i, - "message": message, - "finish_reason": "stop", - } - for i, message in enumerate(output_messages) - ], - "model": model_name, - "usage": completion_usage, - } - } - ), - } - respx_mock.post(url).mock( - return_value=Response(status_code=status_code, **respx_kwargs) - ) - chat_model = ChatOpenAI(model_name="gpt-3.5-turbo", streaming=is_stream) # type: ignore - retriever = KNNRetriever( - index=np.ones((len(documents), 2)), - texts=documents, - embeddings=FakeEmbeddings(size=2), - ) - rqa = RetrievalQA.from_chain_type( - llm=chat_model, - retriever=retriever, - chain_type_kwargs={"prompt": prompt}, - ) - with suppress(openai.BadRequestError): - if is_async: - asyncio.run(rqa.ainvoke({"query": question})) - else: - rqa.invoke({"query": question}) - - spans = in_memory_span_exporter.get_finished_spans() - spans_by_name = {span.name: span for span in spans} - - assert (rqa_span := spans_by_name.pop("RetrievalQA")) is not None - assert rqa_span.parent is None - rqa_attributes = dict(rqa_span.attributes or {}) - assert ( - rqa_attributes.pop(LLM_SPAN_KIND, None) == SpanKindValues.CHAIN.value - ) - assert rqa_attributes.pop(INPUT_VALUE, None) == question - if status_code == 200: - assert rqa_span.status.status_code == trace_api.StatusCode.OK - assert ( - rqa_attributes.pop(OUTPUT_VALUE, None) - == output_messages[0]["content"] - ) - elif status_code == 400: - assert rqa_span.status.status_code == trace_api.StatusCode.ERROR - assert rqa_span.events[0].name == "exception" - assert rqa_attributes == {} - - assert (sd_span := spans_by_name.pop("StuffDocumentsChain")) is not None - assert sd_span.parent is not None - assert sd_span.parent.span_id == rqa_span.context.span_id - assert sd_span.context.trace_id == rqa_span.context.trace_id - sd_attributes = dict(sd_span.attributes or {}) - assert sd_attributes.pop(LLM_SPAN_KIND, None) == SpanKindValues.CHAIN.value - assert sd_attributes.pop(INPUT_VALUE, None) is not None - assert ( - sd_attributes.pop(INPUT_MIME_TYPE, None) == MimeTypeValues.JSON.value - ) - if status_code == 200: - assert sd_span.status.status_code == trace_api.StatusCode.OK - assert ( - sd_attributes.pop(OUTPUT_VALUE, None) - == output_messages[0]["content"] - ) - elif status_code == 400: - assert sd_span.status.status_code == trace_api.StatusCode.ERROR - assert sd_attributes == {} - - retriever_span = None - for name in ["Retriever", "KNNRetriever"]: - if name in spans_by_name: - retriever_span = spans_by_name.pop(name) - break - assert retriever_span is not None - assert retriever_span.parent is not None - assert retriever_span.parent.span_id == rqa_span.context.span_id - assert retriever_span.context.trace_id == rqa_span.context.trace_id - retriever_attributes = dict(retriever_span.attributes or {}) - assert ( - retriever_attributes.pop(LLM_SPAN_KIND, None) - == SpanKindValues.RETRIEVER.value - ) - assert retriever_attributes.pop(INPUT_VALUE, None) == question - assert retriever_attributes.pop(OUTPUT_VALUE, None) is not None - assert ( - retriever_attributes.pop(OUTPUT_MIME_TYPE, None) - == MimeTypeValues.JSON.value - ) - for i, text in enumerate(documents): - assert ( - retriever_attributes.pop( - f"{RETRIEVAL_DOCUMENTS}.{i}.{DOCUMENT_CONTENT}", None - ) - == text - ) - allowed_extra = {"metadata"} - assert ( - not retriever_attributes - or set(retriever_attributes.keys()) <= allowed_extra - ) - - assert (llm_span := spans_by_name.pop("LLMChain", None)) is not None - assert llm_span.parent is not None - assert llm_span.parent.span_id == sd_span.context.span_id - assert llm_span.context.trace_id == sd_span.context.trace_id - llm_attributes = dict(llm_span.attributes or {}) - - input_value = llm_attributes.get(INPUT_VALUE) - assert input_value is not None - for var in ["question", "context"]: - assert var in input_value - - if LLM_PROMPT_TEMPLATE in llm_attributes: - assert llm_attributes.pop(LLM_PROMPT_TEMPLATE, None) == template - template_variables_json_string = llm_attributes.pop( - LLM_PROMPT_TEMPLATE_VARIABLES, None - ) - assert isinstance(template_variables_json_string, str) - assert json.loads(template_variables_json_string) == { - "context": "\n\n".join(documents), - "question": question, - } - assert ( - llm_attributes.pop(LLM_SPAN_KIND, None) == SpanKindValues.CHAIN.value - ) - assert ( - llm_attributes.pop(INPUT_MIME_TYPE, None) == MimeTypeValues.JSON.value - ) - - if status_code == 200: - assert ( - llm_attributes.pop(OUTPUT_VALUE, None) - == output_messages[0]["content"] - ) - elif status_code == 400: - assert llm_span.status.status_code == trace_api.StatusCode.ERROR - - assert (oai_span := spans_by_name.pop("ChatOpenAI", None)) is not None - assert oai_span.parent is not None - assert oai_span.parent.span_id == llm_span.context.span_id - assert oai_span.context.trace_id == llm_span.context.trace_id - oai_attributes = dict(oai_span.attributes or {}) - assert oai_attributes.pop(LLM_SPAN_KIND, None) == SpanKindValues.LLM.value - assert oai_attributes.pop(LLM_MODEL_NAME, None) is not None - assert oai_attributes.pop(INPUT_VALUE, None) is not None - assert ( - oai_attributes.pop(INPUT_MIME_TYPE, None) == MimeTypeValues.JSON.value - ) - assert oai_attributes.pop(LLM_PROMPTS + ".0." + CONTENT, None) is not None - if oai_attributes.__contains__(METADATA): - assert oai_attributes.pop(METADATA) - if status_code == 200: - assert oai_span.status.status_code == trace_api.StatusCode.OK - assert oai_attributes.pop(OUTPUT_VALUE, None) is not None - assert ( - oai_attributes.pop(OUTPUT_MIME_TYPE, None) - == MimeTypeValues.JSON.value - ) - assert ( - oai_attributes.pop(f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", None) - == output_messages[0]["role"] - ) - assert ( - oai_attributes.pop( - f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", None - ) - == output_messages[0]["content"] - ) - if not is_stream: - assert ( - oai_attributes.pop(LLM_RESPONSE_FINISH_REASON, None) == "stop" - ) - oai_attributes.pop(LLM_RESPONSE_MODEL_NAME) - assert ( - oai_attributes.pop(LLM_USAGE_TOTAL_TOKENS, None) - == completion_usage["total_tokens"] - ) - assert ( - oai_attributes.pop(LLM_USAGE_PROMPT_TOKENS, None) - == completion_usage["prompt_tokens"] - ) - assert ( - oai_attributes.pop(LLM_USAGE_COMPLETION_TOKENS, None) - == completion_usage["completion_tokens"] - ) - elif status_code == 400: - assert oai_span.status.status_code == trace_api.StatusCode.ERROR - - assert spans_by_name == {} - - -@pytest.mark.parametrize("is_async", [False, True]) -@pytest.mark.parametrize("is_stream", [False, True]) -@pytest.mark.parametrize("status_code", [200, 400]) -def test_llm_metrics( - is_async: bool, - is_stream: bool, - status_code: int, - respx_mock: MockRouter, - in_memory_metric_reader: InMemoryMetricReader, - documents: List[str], - chat_completion_mock_stream: Tuple[List[bytes], List[Dict[str, Any]]], - model_name: str, - completion_usage: Dict[str, Any], -) -> None: - question = randstr() - template = "{context}{question}" - prompt = PromptTemplate( - input_variables=["context", "question"], template=template - ) - output_messages: List[Dict[str, Any]] = ( - chat_completion_mock_stream[1] - if is_stream - else [{"role": randstr(), "content": randstr()}] - ) - url = "https://api.openai.com/v1/chat/completions" - respx_kwargs: Dict[str, Any] = { - **( - {"stream": MockByteStream(chat_completion_mock_stream[0])} - if is_stream - else { - "json": { - "choices": [ - { - "index": i, - "message": message, - "finish_reason": "stop", - } - for i, message in enumerate(output_messages) - ], - "model": model_name, - "usage": completion_usage, - } - } - ), - } - respx_mock.post(url).mock( - return_value=Response(status_code=status_code, **respx_kwargs) - ) - chat_model = ChatOpenAI(model_name="gpt-3.5-turbo", streaming=is_stream) # type: ignore - retriever = KNNRetriever( - index=np.ones((len(documents), 2)), - texts=documents, - embeddings=FakeEmbeddings(size=2), - ) - rqa = RetrievalQA.from_chain_type( - llm=chat_model, - retriever=retriever, - chain_type_kwargs={"prompt": prompt}, - ) - with suppress(openai.BadRequestError): - if is_async: - asyncio.run(rqa.ainvoke({"query": question})) - else: - rqa.invoke({"query": question}) - metric_data = in_memory_metric_reader.get_metrics_data() - if metric_data is None or not metric_data.resource_metrics: - # Skip metrics validation if no metrics data is available - return - metric_list = metric_data.resource_metrics[0].scope_metrics[0].metrics - for metric in metric_list: - if metric.name == "genai_llm_usage_tokens": - assert len(metric.data.data_points) == 2 - if metric.name == "genai_calls_count": - assert len(metric.data.data_points) == 3 - if metric.name == "genai_calls_duration_seconds": - assert len(metric.data.data_points) == 3 - for datapoint in metric.data.data_points: - attributes = datapoint.attributes - assert "callType" in attributes.keys() - assert attributes["callType"] == "gen_ai" - assert "callKind" in attributes.keys() - assert attributes["callKind"] == "custom_entry" - assert "rpcType" in attributes.keys() - assert attributes["rpcType"] == 2100 - if metric.name == "genai_llm_usage_tokens": - assert "modelName" in attributes.keys() - assert "usageType" in attributes.keys() - - -def test_chain_metadata( - respx_mock: MockRouter, - in_memory_span_exporter: InMemorySpanExporter, - completion_usage: Dict[str, Any], -) -> None: - url = "https://api.openai.com/v1/chat/completions" - respx_kwargs: Dict[str, Any] = { - "json": { - "choices": [ - { - "index": 0, - "message": {"role": "assistant", "content": "nock nock"}, - "finish_reason": "stop", - } - ], - "model": "gpt-3.5-turbo", - "usage": completion_usage, - } - } - respx_mock.post(url).mock( - return_value=Response(status_code=200, **respx_kwargs) - ) - prompt_template = "Tell me a {adjective} joke" - prompt = PromptTemplate( - input_variables=["adjective"], template=prompt_template - ) - llm = ChatOpenAI() - chain = prompt | llm - chain = chain.with_config({"metadata": {"category": "jokes"}}) - chain.invoke({"adjective": "funny"}) - spans = in_memory_span_exporter.get_finished_spans() - spans_by_name = {span.name: span for span in spans} - - assert ( - llm_chain_span := spans_by_name.pop("RunnableSequence") - ) is not None - assert llm_chain_span.attributes - assert llm_chain_span.attributes.get(METADATA) == '{"category": "jokes"}' - - -def test_callback_llm_exception_event( - respx_mock, - in_memory_span_exporter, - documents, - chat_completion_mock_stream, - model_name, - completion_usage, -): - """ - 用特殊的mock触发异常 - """ - - # 用自定义异常,避免openai.BadRequestError构造问题 - class MyCustomError(Exception): - pass - - class ErrorLLM(ChatOpenAI): - def _generate(self, *args, **kwargs): - raise MyCustomError("mock error") - - prompt = PromptTemplate( - input_variables=["question"], template="{question}" - ) - llm = ErrorLLM() - chain = prompt | llm - with pytest.raises(MyCustomError): - chain.invoke({"question": "test?"}) - spans = in_memory_span_exporter.get_finished_spans() - for span in spans: - if span.name == "RunnableSequence": - from opentelemetry import trace as trace_api # noqa: PLC0415 - - assert span.status.status_code == trace_api.StatusCode.ERROR - assert span.events[0].name == "exception" - assert "MyCustomError" in span.events[0].attributes.get( - "exception.type", "" - ) - break - else: - assert False, "No RunnableSequence span found" - - -def test_environment_control_comprehensive( - respx_mock, - in_memory_span_exporter, - in_memory_metric_reader, - documents, - chat_completion_mock_stream, - model_name, - completion_usage, -): - """ - 测试环境变量控制的完整分支,确保所有环境变量分支都被覆盖 - """ - # 统一mock - url = "https://api.openai.com/v1/chat/completions" - respx_mock.post(url).mock( - return_value=Response( - 200, - json={ - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "test response", - }, - "finish_reason": "stop", - } - ], - "model": "gpt-3.5-turbo", - "usage": completion_usage, - }, - ) - ) - - original_env = os.getenv("ENABLE_LANGCHAIN_INSTRUMENTOR") - - os.environ["ENABLE_LANGCHAIN_INSTRUMENTOR"] = "FALSE" - try: - prompt = PromptTemplate( - input_variables=["question"], template="{question}" - ) - llm = ChatOpenAI(model_name="gpt-3.5-turbo") - chain = prompt | llm - chain.invoke({"question": "test?"}) - spans = in_memory_span_exporter.get_finished_spans() - assert len(spans) == 0 - finally: - if original_env is None: - os.environ.pop("ENABLE_LANGCHAIN_INSTRUMENTOR", None) - else: - os.environ["ENABLE_LANGCHAIN_INSTRUMENTOR"] = original_env - - os.environ["ENABLE_LANGCHAIN_INSTRUMENTOR"] = "True" - prompt = PromptTemplate( - input_variables=["question"], template="{question}" - ) - llm = ChatOpenAI(model_name="gpt-3.5-turbo") - chain = prompt | llm - - _ = chain.invoke({"question": "test?"}) - spans = in_memory_span_exporter.get_finished_spans() - assert len(spans) > 0 - - -@pytest.fixture -def documents() -> List[str]: - return [randstr(), randstr()] - - -@pytest.fixture -def chat_completion_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]: - return ( - [ - b'data: {"choices": [{"delta": {"role": "assistant"}, "index": 0}]}\n\n', - b'data: {"choices": [{"delta": {"content": "A"}, "index": 0}]}\n\n', - b'data: {"choices": [{"delta": {"content": "B"}, "index": 0}]}\n\n', - b'data: {"choices": [{"delta": {"content": "C"}, "index": 0}]}\n\n', - b"data: [DONE]\n", - ], - [{"role": "assistant", "content": "ABC"}], - ) - - -@pytest.fixture(scope="module") -def in_memory_span_exporter() -> InMemorySpanExporter: - return InMemorySpanExporter() - - -@pytest.fixture(scope="module") -def in_memory_metric_reader() -> InMemoryMetricReader: - return InMemoryMetricReader() - - -@pytest.fixture(scope="module") -def tracer_provider( - in_memory_span_exporter: InMemorySpanExporter, -) -> trace_api.TracerProvider: - tracer_provider = trace_sdk.TracerProvider() - tracer_provider.add_span_processor( - SimpleSpanProcessor(in_memory_span_exporter) - ) - return tracer_provider - - -@pytest.fixture(scope="module") -def meter_provider( - in_memory_metric_reader: InMemoryMetricReader, -) -> MeterProvider: - meter_provider = MeterProvider(metric_readers=[in_memory_metric_reader]) - return meter_provider - - -@pytest.fixture(autouse=True) -def instrument( - tracer_provider: trace_api.TracerProvider, - in_memory_span_exporter: InMemorySpanExporter, - meter_provider: MeterProvider, - in_memory_metric_reader: InMemoryMetricReader, -) -> Generator[None, None, None]: - LangChainInstrumentor().instrument( - tracer_provider=tracer_provider, meter_provider=meter_provider - ) - yield - LangChainInstrumentor().uninstrument() - in_memory_span_exporter.clear() - in_memory_metric_reader.force_flush() - - -@pytest.fixture(autouse=True) -def openai_api_key(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setenv("OPENAI_API_KEY", "sk-") - - -@pytest.fixture(scope="module") -def seed() -> Iterator[int]: - """ - Use rolling seeds to help debugging, because the rolling pseudo-random values - allow conditional breakpoints to be hit precisely (and repeatably). - """ - return count() - - -@pytest.fixture(autouse=True) -def set_seed(seed: Iterator[int]) -> Iterator[None]: - random.seed(next(seed)) - yield - - -@pytest.fixture -def completion_usage() -> Dict[str, Any]: - prompt_tokens = random.randint(1, 1000) - completion_tokens = random.randint(1, 1000) - return { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } - - -@pytest.fixture -def model_name() -> str: - return randstr() - - -def randstr() -> str: - return str(random.random()) - - -class MockByteStream(SyncByteStream, AsyncByteStream): - def __init__(self, byte_stream: Iterable[bytes]): - self._byte_stream = byte_stream - - def __iter__(self) -> Iterator[bytes]: - for byte_string in self._byte_stream: - yield byte_string - - async def __aiter__(self) -> AsyncIterator[bytes]: - for byte_string in self._byte_stream: - yield byte_string diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_message_parsing.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_message_parsing.py deleted file mode 100644 index 946dd56fc..000000000 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_message_parsing.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright The OpenTelemetry Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -from langchain_core.messages import ( - AIMessage, - ChatMessage, - FunctionMessage, - HumanMessage, - SystemMessage, - ToolMessage, -) - -from opentelemetry.instrumentation.langchain.internal._tracer import ( - _input_messages, - _parse_message_data, -) - - -class MessageAttributes: - MESSAGE_ROLE = "role" - MESSAGE_CONTENT = "content" - MESSAGE_NAME = "name" - MESSAGE_FUNCTION_CALL_NAME = "function_call.name" - MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "function_call.arguments" - MESSAGE_TOOL_CALLS = "tool_calls" - - -class ToolCallAttributes: - TOOL_CALL_FUNCTION_NAME = "tool_call.function.name" - TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments" - - -class TestMessageParsing: - """测试消息解析功能""" - - @pytest.mark.parametrize( - "test_case", - [ - # 基础消息类型测试 - { - "name": "human_message_basic", - "message": HumanMessage(content="Hello, how are you?"), - "expected": { - MessageAttributes.MESSAGE_ROLE: "user", - MessageAttributes.MESSAGE_CONTENT: "Hello, how are you?", - }, - }, - { - "name": "ai_message_basic", - "message": AIMessage(content="I'm doing well, thank you!"), - "expected": { - MessageAttributes.MESSAGE_ROLE: "assistant", - MessageAttributes.MESSAGE_CONTENT: "I'm doing well, thank you!", - }, - }, - { - "name": "system_message_basic", - "message": SystemMessage( - content="You are a helpful assistant." - ), - "expected": { - MessageAttributes.MESSAGE_ROLE: "system", - MessageAttributes.MESSAGE_CONTENT: "You are a helpful assistant.", - }, - }, - { - "name": "function_message_basic", - "message": FunctionMessage( - content="The weather is sunny", name="get_weather" - ), - "expected": { - MessageAttributes.MESSAGE_ROLE: "function", - MessageAttributes.MESSAGE_CONTENT: "The weather is sunny", - MessageAttributes.MESSAGE_NAME: "get_weather", - }, - }, - { - "name": "tool_message_basic", - "message": ToolMessage( - content="Tool execution completed", - name="calculator", - tool_call_id="call_123", - ), - "expected": { - MessageAttributes.MESSAGE_ROLE: "tool", - MessageAttributes.MESSAGE_CONTENT: "Tool execution completed", - MessageAttributes.MESSAGE_NAME: "calculator", - }, - }, - { - "name": "chat_message_basic", - "message": ChatMessage(content="Hello from chat", role="user"), - "expected": { - MessageAttributes.MESSAGE_ROLE: "user", - MessageAttributes.MESSAGE_CONTENT: "Hello from chat", - }, - }, - # 函数调用测试 - { - "name": "ai_message_with_function_call", - "message": AIMessage( - content="I'll call a function to get weather information", - additional_kwargs={ - "function_call": { - "name": "get_weather", - "arguments": '{"location": "Beijing", "unit": "celsius"}', - } - }, - ), - "expected": { - MessageAttributes.MESSAGE_ROLE: "assistant", - MessageAttributes.MESSAGE_CONTENT: "I'll call a function to get weather information", - MessageAttributes.MESSAGE_FUNCTION_CALL_NAME: "get_weather", - MessageAttributes.MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON: '{"location": "Beijing", "unit": "celsius"}', - }, - }, - # 工具调用测试 - { - "name": "ai_message_with_tool_calls", - "message": AIMessage( - content="I'll use a calculator tool", - additional_kwargs={ - "tool_calls": [ - { - "function": { - "name": "calculator", - "arguments": '{"operation": "add", "a": 1, "b": 2}', - } - } - ] - }, - ), - "expected": { - MessageAttributes.MESSAGE_ROLE: "assistant", - MessageAttributes.MESSAGE_CONTENT: "I'll use a calculator tool", - MessageAttributes.MESSAGE_TOOL_CALLS: [ - { - ToolCallAttributes.TOOL_CALL_FUNCTION_NAME: "calculator", - ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON: '{"operation": "add", "a": 1, "b": 2}', - } - ], - }, - }, - # 多模态内容测试 - { - "name": "human_message_with_multimodal_content", - "message": HumanMessage( - content=[ - "Here's an image:", - { - "type": "image", - "url": "http://example.com/image.jpg", - }, - ] - ), - "expected": { - MessageAttributes.MESSAGE_ROLE: "user", - f"{MessageAttributes.MESSAGE_CONTENT}.0": "Here's an image:", - f"{MessageAttributes.MESSAGE_CONTENT}.1": "{'type': 'image', 'url': 'http://example.com/image.jpg'}", - }, - }, - # 边界情况测试 - { - "name": "message_with_non_string_name", - "message_data": { - "id": ["langchain", "core", "messages", "HumanMessage"], - "kwargs": { - "content": "Hello", - "name": 123, # 非字符串name - }, - }, - "expected": { - MessageAttributes.MESSAGE_ROLE: "user", - MessageAttributes.MESSAGE_CONTENT: "Hello", - }, - }, - { - "name": "message_with_non_string_content", - "message_data": { - "id": ["langchain", "core", "messages", "HumanMessage"], - "kwargs": { - "content": 123 # 非字符串content - }, - }, - "expected": {MessageAttributes.MESSAGE_ROLE: "user"}, - }, - ], - ) - def test_parse_message_data(self, test_case): - """测试消息数据解析""" - # 处理两种输入格式:LangChain消息对象或字典数据 - if "message" in test_case: - message_data = test_case["message"].to_json() - else: - message_data = test_case["message_data"] - - result = dict(_parse_message_data(message_data)) - - for key, expected_value in test_case["expected"].items(): - if key == MessageAttributes.MESSAGE_TOOL_CALLS: - tool_calls = result.get(key) - assert tool_calls is not None, ( - f"Expected tool_calls for {test_case['name']}" - ) - # 检查tool_calls列表中是否包含期望的函数名 - found = False - for tool_call in tool_calls: - if ( - tool_call.get( - ToolCallAttributes.TOOL_CALL_FUNCTION_NAME - ) - == "calculator" - ): - found = True - break - assert found, ( - f"Expected calculator function in tool_calls for {test_case['name']}" - ) - else: - actual_value = result.get(key) - assert actual_value == expected_value, ( - f"Expected {expected_value}, got {actual_value} for {test_case['name']}" - ) - - -class TestInputMessagesParsing: - """测试输入消息解析功能""" - - @pytest.mark.parametrize( - "test_case", - [ - # 基础消息列表测试 - { - "name": "basic_messages_list", - "input": { - "messages": [ - [ - HumanMessage(content="Hello"), - AIMessage(content="Hi there!"), - ] - ] - }, - "expected_count": 1, - }, - # 单个消息测试 - { - "name": "single_message", - "input": {"messages": HumanMessage(content="Single message")}, - "expected_count": 1, - }, - # 空输入测试 - { - "name": "empty_messages", - "input": {"messages": [[]]}, - "expected_count": 0, - }, - {"name": "none_input", "input": None, "expected_count": 0}, - { - "name": "invalid_message_types", - "input": { - "messages": [[123, "invalid", {"invalid": "format"}]], - "prompts": None, - }, - "expected_count": 1, - }, - ], - ) - def test_input_messages_parsing(self, test_case): - """测试输入消息解析""" - result = list(_input_messages(test_case["input"])) - assert len(result) == test_case["expected_count"], ( - f"Expected {test_case['expected_count']} results for {test_case['name']}" - ) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_metadata.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_metadata.py deleted file mode 100644 index 4a97d0b6e..000000000 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_metadata.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright The OpenTelemetry Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from unittest.mock import Mock - -import pytest - -from opentelemetry.instrumentation.langchain.internal._tracer import _metadata - -LLM_SESSION_ID = "gen_ai.session.id" -LLM_USER_ID = "gen_ai.user.id" -METADATA = "metadata" - - -class TestMetadata: - """测试_metadata函数的功能""" - - @pytest.mark.parametrize( - "test_case", - [ - # 测试基本的metadata - 包含session_id和user_id - { - "name": "basic_metadata_with_session_and_user", - "run_extra": { - "metadata": { - "session_id": "session_123", - "user_id": "user_456", - "env": "production", - } - }, - "expected": [ - (LLM_SESSION_ID, "session_123"), - (LLM_USER_ID, "user_456"), - ( - METADATA, - json.dumps( - { - "session_id": "session_123", - "user_id": "user_456", - "env": "production", - } - ), - ), - ], - }, - # 测试使用conversation_id作为session_id - { - "name": "metadata_with_conversation_id", - "run_extra": { - "metadata": { - "conversation_id": "conv_789", - "user_id": "user_abc", - "timestamp": "2024-01-01T00:00:00Z", - } - }, - "expected": [ - (LLM_SESSION_ID, "conv_789"), - (LLM_USER_ID, "user_abc"), - ( - METADATA, - json.dumps( - { - "conversation_id": "conv_789", - "user_id": "user_abc", - "timestamp": "2024-01-01T00:00:00Z", - } - ), - ), - ], - }, - # 测试使用thread_id作为session_id - { - "name": "metadata_with_thread_id", - "run_extra": { - "metadata": { - "thread_id": "thread_xyz", - "user_id": "user_def", - "version": "1.0.0", - } - }, - "expected": [ - (LLM_SESSION_ID, "thread_xyz"), - (LLM_USER_ID, "user_def"), - ( - METADATA, - json.dumps( - { - "thread_id": "thread_xyz", - "user_id": "user_def", - "version": "1.0.0", - } - ), - ), - ], - }, - # 测试只有session_id,没有user_id - { - "name": "metadata_session_id_only", - "run_extra": { - "metadata": { - "session_id": "session_only", - "env": "development", - } - }, - "expected": [ - (LLM_SESSION_ID, "session_only"), - ( - METADATA, - json.dumps( - { - "session_id": "session_only", - "env": "development", - } - ), - ), - ], - }, - # 测试只有user_id,没有session_id - { - "name": "metadata_user_id_only", - "run_extra": { - "metadata": {"user_id": "user_only", "env": "test"} - }, - "expected": [ - (LLM_USER_ID, "user_only"), - ( - METADATA, - json.dumps({"user_id": "user_only", "env": "test"}), - ), - ], - }, - # 测试session_id优先级 - session_id > conversation_id > thread_id - { - "name": "session_id_priority", - "run_extra": { - "metadata": { - "session_id": "session_priority", - "conversation_id": "conv_ignored", - "thread_id": "thread_ignored", - "user_id": "user_priority", - } - }, - "expected": [ - (LLM_SESSION_ID, "session_priority"), - (LLM_USER_ID, "user_priority"), - ( - METADATA, - json.dumps( - { - "session_id": "session_priority", - "conversation_id": "conv_ignored", - "thread_id": "thread_ignored", - "user_id": "user_priority", - } - ), - ), - ], - }, - # 测试conversation_id优先级 - conversation_id > thread_id - { - "name": "conversation_id_priority", - "run_extra": { - "metadata": { - "conversation_id": "conv_priority", - "thread_id": "thread_ignored", - "user_id": "user_conv", - } - }, - "expected": [ - (LLM_SESSION_ID, "conv_priority"), - (LLM_USER_ID, "user_conv"), - ( - METADATA, - json.dumps( - { - "conversation_id": "conv_priority", - "thread_id": "thread_ignored", - "user_id": "user_conv", - } - ), - ), - ], - }, - # 测试包含特殊字符的metadata - { - "name": "metadata_with_special_characters", - "run_extra": { - "metadata": { - "session_id": "session-123_456", - "user_id": "user.abc@def", - "env": "test-env_1.0", - } - }, - "expected": [ - (LLM_SESSION_ID, "session-123_456"), - (LLM_USER_ID, "user.abc@def"), - ( - METADATA, - json.dumps( - { - "session_id": "session-123_456", - "user_id": "user.abc@def", - "env": "test-env_1.0", - } - ), - ), - ], - }, - # 测试包含中文的metadata - { - "name": "metadata_with_chinese", - "run_extra": { - "metadata": { - "session_id": "会话_123", - "user_id": "用户_456", - "env": "生产环境", - } - }, - "expected": [ - (LLM_SESSION_ID, "会话_123"), - (LLM_USER_ID, "用户_456"), - ( - METADATA, - json.dumps( - { - "session_id": "会话_123", - "user_id": "用户_456", - "env": "生产环境", - } - ), - ), - ], - }, - # 测试空metadata - { - "name": "empty_metadata", - "run_extra": {"metadata": {}}, - "expected": [], - }, - # 测试没有metadata键 - { - "name": "no_metadata_key", - "run_extra": {"other_key": "other_value"}, - "expected": [], - }, - # 测试None的run_extra - {"name": "none_run_extra", "run_extra": None, "expected": []}, - # 测试空run_extra - {"name": "empty_run_extra", "run_extra": {}, "expected": []}, - # 测试包含无效类型的metadata - { - "name": "invalid_metadata_type", - "run_extra": {"metadata": "not a dict"}, - "expected": [], - }, - # 测试包含Unicode字符的metadata - { - "name": "unicode_metadata", - "run_extra": { - "metadata": { - "session_id": "session_unicode_测试", - "user_id": "user_unicode_测试", - "message": "Hello 世界 🌍", - "emoji": "🚀✨🎉", - } - }, - "expected": [ - (LLM_SESSION_ID, "session_unicode_测试"), - (LLM_USER_ID, "user_unicode_测试"), - ( - METADATA, - json.dumps( - { - "session_id": "session_unicode_测试", - "user_id": "user_unicode_测试", - "message": "Hello 世界 🌍", - "emoji": "🚀✨🎉", - } - ), - ), - ], - }, - ], - ) - def test_metadata_parsing(self, test_case): - """测试metadata解析功能""" - # 创建模拟的Run对象 - mock_run = Mock() - mock_run.extra = test_case["run_extra"] - - result = list(_metadata(mock_run)) - - # 验证结果数量 - assert len(result) == len(test_case["expected"]), ( - f"Expected {len(test_case['expected'])} results, got {len(result)} for {test_case['name']}" - ) - - # 验证每个结果 - for i, (actual_key, actual_value) in enumerate(result): - expected_key, expected_value = test_case["expected"][i] - assert actual_key == expected_key, ( - f"Expected key {expected_key}, got {actual_key} for {test_case['name']}" - ) - assert actual_value == expected_value, ( - f"Expected value {expected_value}, got {actual_value} for {test_case['name']}" - ) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_prompts.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_prompts.py deleted file mode 100644 index 669387282..000000000 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_prompts.py +++ /dev/null @@ -1,324 +0,0 @@ -# Copyright The OpenTelemetry Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from opentelemetry.instrumentation.langchain.internal._tracer import _prompts - - -class SpanAttributes: - GEN_AI_PROMPT = "gen_ai.prompt" - CONTENT = "content" - - -class MessageAttributes: - MESSAGE_ROLE = "role" - MESSAGE_CONTENT = "content" - - -class TestPrompts: - """测试_prompts函数的功能""" - - @pytest.mark.parametrize( - "test_case", - [ - # 测试dict类型的prompt - 包含role和text - { - "name": "dict_prompt_with_role_and_text", - "inputs": { - "prompts": [ - {"role": "user", "text": "Hello, how are you?"}, - { - "role": "assistant", - "text": "I'm doing well, thank you!", - }, - ] - }, - "expected": [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_ROLE}", - "user", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_CONTENT}", - "Hello, how are you?", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.1.{MessageAttributes.MESSAGE_ROLE}", - "assistant", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.1.{MessageAttributes.MESSAGE_CONTENT}", - "I'm doing well, thank you!", - ), - ], - }, - # 测试dict类型的prompt - 只包含role - { - "name": "dict_prompt_with_role_only", - "inputs": {"prompts": [{"role": "system"}]}, - "expected": [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_ROLE}", - "system", - ) - ], - }, - # 测试dict类型的prompt - 只包含text - { - "name": "dict_prompt_with_text_only", - "inputs": {"prompts": [{"text": "This is a system message"}]}, - "expected": [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_CONTENT}", - "This is a system message", - ) - ], - }, - # 测试dict类型的prompt - 空dict - { - "name": "dict_prompt_empty", - "inputs": {"prompts": [{}]}, - "expected": [], - }, - # 测试string类型的prompt - { - "name": "string_prompt_single", - "inputs": {"prompts": ["Hello, this is a simple prompt"]}, - "expected": [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{SpanAttributes.CONTENT}", - "Hello, this is a simple prompt", - ) - ], - }, - # 测试string类型的prompt - 多个 - { - "name": "string_prompt_multiple", - "inputs": { - "prompts": [ - "First prompt", - "Second prompt", - "Third prompt", - ] - }, - "expected": [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{SpanAttributes.CONTENT}", - "First prompt", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.1.{SpanAttributes.CONTENT}", - "Second prompt", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.2.{SpanAttributes.CONTENT}", - "Third prompt", - ), - ], - }, - # 测试混合类型 - dict和string - { - "name": "mixed_prompt_types", - "inputs": { - "prompts": [ - {"role": "user", "text": "User message"}, - "Simple string prompt", - {"role": "assistant", "text": "Assistant response"}, - ] - }, - "expected": [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_ROLE}", - "user", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_CONTENT}", - "User message", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.1.{SpanAttributes.CONTENT}", - "Simple string prompt", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.2.{MessageAttributes.MESSAGE_ROLE}", - "assistant", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.2.{MessageAttributes.MESSAGE_CONTENT}", - "Assistant response", - ), - ], - }, - # 测试边界情况 - 空prompts列表 - { - "name": "empty_prompts_list", - "inputs": {"prompts": []}, - "expected": [], - }, - # 测试边界情况 - 没有prompts键 - { - "name": "no_prompts_key", - "inputs": {"messages": ["some message"]}, - "expected": [], - }, - # 测试边界情况 - None输入 - {"name": "none_input", "inputs": None, "expected": []}, - # 测试边界情况 - 空dict输入 - {"name": "empty_dict_input", "inputs": {}, "expected": []}, - # 测试边界情况 - 非Mapping类型输入 - { - "name": "non_mapping_input", - "inputs": "not a dict", - "expected": [], - }, - ], - ) - def test_prompts_parsing(self, test_case): - """测试prompts解析功能""" - result = list(_prompts(test_case["inputs"])) - - # 验证结果数量 - assert len(result) == len(test_case["expected"]), ( - f"Expected {len(test_case['expected'])} results, got {len(result)} for {test_case['name']}" - ) - - # 验证每个结果 - for i, (actual_key, actual_value) in enumerate(result): - expected_key, expected_value = test_case["expected"][i] - assert actual_key == expected_key, ( - f"Expected key {expected_key}, got {actual_key} for {test_case['name']}" - ) - assert actual_value == expected_value, ( - f"Expected value {expected_value}, got {actual_value} for {test_case['name']}" - ) - - def test_prompts_with_invalid_prompt_types(self): - """测试包含无效prompt类型的情况""" - inputs = { - "prompts": [ - {"role": "user", "text": "Valid dict prompt"}, - 123, # 无效类型 - "Valid string prompt", - None, # 无效类型 - {"role": "assistant", "text": "Another valid dict prompt"}, - ] - } - - result = list(_prompts(inputs)) - - # 应该只处理有效的prompt类型 - expected = [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_ROLE}", - "user", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_CONTENT}", - "Valid dict prompt", - ), - ] - - # 验证expected中的key和value是否在result中 - for key, value in expected: - assert (key, value) in result - - def test_prompts_with_complex_dict_structure(self): - """测试复杂的dict结构""" - inputs = { - "prompts": [ - { - "role": "user", - "text": "Complex user message", - "additional_info": "This should be ignored", - }, - { - "role": "system", - "text": "System configuration", - "metadata": {"version": "1.0"}, - }, - ] - } - - result = list(_prompts(inputs)) - - # 只应该提取role和text字段 - expected = [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_ROLE}", - "user", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_CONTENT}", - "Complex user message", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.1.{MessageAttributes.MESSAGE_ROLE}", - "system", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.1.{MessageAttributes.MESSAGE_CONTENT}", - "System configuration", - ), - ] - - assert len(result) == len(expected) - for i, (actual_key, actual_value) in enumerate(result): - expected_key, expected_value = expected[i] - assert actual_key == expected_key - assert actual_value == expected_value - - def test_prompts_with_special_characters(self): - """测试包含特殊字符的prompt""" - inputs = { - "prompts": [ - {"role": "user", "text": "Hello\nWorld\tTab"}, - "String with \"quotes\" and 'apostrophes'", - { - "role": "assistant", - "text": "Response with unicode: 你好世界", - }, - ] - } - - result = list(_prompts(inputs)) - - expected = [ - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_ROLE}", - "user", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.0.{MessageAttributes.MESSAGE_CONTENT}", - "Hello\nWorld\tTab", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.1.{SpanAttributes.CONTENT}", - "String with \"quotes\" and 'apostrophes'", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.2.{MessageAttributes.MESSAGE_ROLE}", - "assistant", - ), - ( - f"{SpanAttributes.GEN_AI_PROMPT}.2.{MessageAttributes.MESSAGE_CONTENT}", - "Response with unicode: 你好世界", - ), - ] - - assert len(result) == len(expected) - for i, (actual_key, actual_value) in enumerate(result): - expected_key, expected_value = expected[i] - assert actual_key == expected_key - assert actual_value == expected_value diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_singleton_tracer.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_singleton_tracer.py deleted file mode 100644 index 1b6261067..000000000 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_singleton_tracer.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright The OpenTelemetry Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Test the singleton behavior of the LangChain tracer to ensure trace continuity. -Verify that modifications to _BaseCallbackManagerInit do not break parent-child relationships when new instances are created. -""" - -import uuid -from typing import Generator -from unittest.mock import Mock, patch - -import pytest -from langchain_core.callbacks.manager import BaseCallbackManager -from langchain_core.tracers.schemas import Run - -from opentelemetry import trace as trace_api -from opentelemetry.instrumentation.langchain import ( - LangChainInstrumentor, - _BaseCallbackManagerInit, -) -from opentelemetry.instrumentation.langchain.internal._tracer import ( - LoongsuiteTracer, -) -from opentelemetry.sdk import trace as trace_sdk -from opentelemetry.sdk.metrics import MeterProvider -from opentelemetry.sdk.metrics.export import InMemoryMetricReader -from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( - InMemorySpanExporter, -) - - -@pytest.fixture(scope="module") -def singleton_test_span_exporter() -> InMemorySpanExporter: - return InMemorySpanExporter() - - -@pytest.fixture(scope="module") -def singleton_test_metric_reader() -> InMemoryMetricReader: - return InMemoryMetricReader() - - -@pytest.fixture(scope="module") -def singleton_test_tracer_provider( - singleton_test_span_exporter: InMemorySpanExporter, -) -> trace_api.TracerProvider: - tracer_provider = trace_sdk.TracerProvider() - tracer_provider.add_span_processor( - SimpleSpanProcessor(singleton_test_span_exporter) - ) - return tracer_provider - - -@pytest.fixture(scope="module") -def singleton_test_meter_provider( - singleton_test_metric_reader: InMemoryMetricReader, -) -> MeterProvider: - meter_provider = MeterProvider( - metric_readers=[singleton_test_metric_reader] - ) - return meter_provider - - -@pytest.fixture(autouse=True) -def singleton_test_instrument( - singleton_test_tracer_provider: trace_api.TracerProvider, - singleton_test_span_exporter: InMemorySpanExporter, - singleton_test_meter_provider: MeterProvider, - singleton_test_metric_reader: InMemoryMetricReader, -) -> Generator[None, None, None]: - LangChainInstrumentor().instrument( - tracer_provider=singleton_test_tracer_provider, - meter_provider=singleton_test_meter_provider, - ) - yield - LangChainInstrumentor().uninstrument() - singleton_test_span_exporter.clear() - singleton_test_metric_reader.force_flush() - - -class TestSingletonTracer: - """测试_BaseCallbackManagerInit单例行为和链路连续性""" - - def test_parent_context_resolution_core_logic( - self, - singleton_test_tracer_provider: trace_api.TracerProvider, - singleton_test_meter_provider: MeterProvider, - ): - """ - 模拟BaseCallbackManager与BaseCallbackHandler重新构建以验证链路连续性 - """ - tracer = singleton_test_tracer_provider.get_tracer("test") - meter = singleton_test_meter_provider.get_meter("test") - - callback_init = _BaseCallbackManagerInit( - tracer=tracer, meter=meter, cls=LoongsuiteTracer - ) - - # 创建测试用的Run数据 - parent_run_id = uuid.uuid4() - child_run_id = uuid.uuid4() - - from datetime import datetime # noqa: PLC0415 - - parent_run = Run( - id=parent_run_id, - name="parent", - run_type="chain", - inputs={}, - start_time=datetime.now(), - ) - - child_run = Run( - id=child_run_id, - name="child", - run_type="llm", - inputs={}, - start_time=datetime.now(), - parent_run_id=parent_run_id, - ) - - # 模拟真实场景:两次callback_init调用,测试单例vs非单例的区别 - # 第一次调用:添加第一个handler并处理parent run - first_manager = BaseCallbackManager(handlers=[]) - callback_init(lambda: None, first_manager, (), {}) - - # 获取第一个handler - first_handler = None - for handler in first_manager.handlers: - if isinstance(handler, LoongsuiteTracer): - first_handler = handler - break - - assert first_handler is not None, "第一次调用应该创建handler" - - with patch.object(first_handler, "_tracer") as mock_tracer: - # 设置mock返回值 - mock_parent_span = Mock() - mock_tracer.start_span.return_value = mock_parent_span - - # 第一个handler处理parent run - first_handler._start_trace(parent_run) - - # 验证parent run被保存 - assert parent_run_id in first_handler._runs, ( - "parent run应该被保存到第一个handler的_runs" - ) - - # 第二次调用:关键测试点 - 验证单例行为 - second_manager = BaseCallbackManager(handlers=[]) - callback_init(lambda: None, second_manager, (), {}) - - # 获取第二个handler - second_handler = None - for handler in second_manager.handlers: - if isinstance(handler, LoongsuiteTracer): - second_handler = handler - break - - assert second_handler is not None, "第二次调用应该创建/返回handler" - - # 关键验证:第二个handler能否看到parent run - parent_in_second_handler = second_handler._runs.get(parent_run_id) - assert parent_in_second_handler is not None, ( - "第二个handler应该能看到parent run(链路连续)" - ) - - # 模拟第二个handler处理child run - with patch.object(second_handler, "_tracer") as mock_tracer: - mock_child_span = Mock() - mock_tracer.start_span.return_value = mock_child_span - - # 在处理child run之前,验证能否找到parent context - with second_handler._lock: - parent_context_found = ( - parent.context - if (parent_run_id := child_run.parent_run_id) - and (parent := second_handler._runs.get(parent_run_id)) - else None - ) - - assert parent_context_found is not None, ( - "child run应该能找到parent context(链路连续)" - ) - - # 处理child run - second_handler._start_trace(child_run) - - # 验证child run被保存 - assert child_run_id in second_handler._runs, "child run应该被保存" - - # 验证两个run都存在 - assert len(second_handler._runs) == 2, "应该有2个run被保存" - assert parent_run_id in second_handler._runs, "parent run应该存在" - assert child_run_id in second_handler._runs, "child run应该存在" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_token_counts.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_token_counts.py deleted file mode 100644 index f763543cd..000000000 --- a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/instrumentation/langchain/test_token_counts.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright The OpenTelemetry Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random - -import pytest -from langchain_core.messages import AIMessage - -# Added import of Generation and AIMessage from langchain_core -from langchain_core.outputs import ChatGeneration, Generation - -from opentelemetry.instrumentation.langchain.internal._tracer import ( - _token_counts, -) - -LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens" -LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens" -LLM_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens" - - -def random_token_triplet(): - prompt_tokens = random.randint(1, 1000) - output_tokens = random.randint(1, 1000) - total_tokens = prompt_tokens + output_tokens - return prompt_tokens, output_tokens, total_tokens - - -def case_generation_info_token_usage(): - # generations[0][0].generation_info.token_usage - prompt_tokens, output_tokens, total_tokens = random_token_triplet() - # 用 Generation 类 - gen = Generation( - text="hello", - generation_info={ - "token_usage": { - "input_tokens": prompt_tokens, - "output_tokens": output_tokens, - "total_tokens": total_tokens, - } - }, - ) - outputs = {"generations": [[gen]]} - return outputs, prompt_tokens, output_tokens, total_tokens - - -def case_message_response_metadata_token_usage(): - # generations[0][0].message.response_metadata.token_usage - prompt_tokens, output_tokens, total_tokens = random_token_triplet() - # 用 Generation + AIMessage - ai_msg = AIMessage( - content="hi", - response_metadata={ - "token_usage": { - "completion_tokens": output_tokens, - "prompt_tokens": prompt_tokens, - "total_tokens": total_tokens, - } - }, - ) - gen = ChatGeneration(text="hello", message=ai_msg) - outputs = {"generations": [[gen]]} - return outputs, prompt_tokens, output_tokens, total_tokens - - -def case_message_response_metadata_token_usage_uppercase(): - # generations[0][0].message.response_metadata.token_usage (大写key) - prompt_tokens, output_tokens, total_tokens = random_token_triplet() - ai_msg = AIMessage( - content="hi", - response_metadata={ - "token_usage": { - "PromptTokens": prompt_tokens, - "CompletionTokens": output_tokens, - "TotalTokens": total_tokens, - } - }, - ) - gen = ChatGeneration(text="hello", message=ai_msg) - outputs = {"generations": [[gen]]} - return outputs, prompt_tokens, output_tokens, total_tokens - - -def case_llm_output_token_usage_priority(): - # llm_output.token_usage 优先,generations 也有 token_usage,但只取 llm_output - prompt_tokens, output_tokens, total_tokens = random_token_triplet() - ai_msg = AIMessage(content="hi") - gen = ChatGeneration( - text="hello", - message=ai_msg, # 这里必须传 message - generation_info={ - "token_usage": { - "PromptTokens": prompt_tokens, - "CompletionTokens": output_tokens, - "TotalTokens": total_tokens, - } - }, - ) - outputs = { - "generations": [[gen]], - "llm_output": { - "token_usage": { - "completion_tokens": output_tokens, - "prompt_tokens": prompt_tokens, - "total_tokens": total_tokens, - } - }, - } - return outputs, prompt_tokens, output_tokens, total_tokens - - -def case_llm_output_empty_should_fallback(): - # llm_output存在但无token_usage,应该跳过,继续找generations - prompt_tokens, output_tokens, total_tokens = random_token_triplet() - ai_msg = AIMessage( - content="hi", - response_metadata={ - "token_usage": { - "completion_tokens": output_tokens, - "prompt_tokens": prompt_tokens, - "total_tokens": total_tokens, - } - }, - ) - gen = ChatGeneration(text="hello", message=ai_msg) - outputs = {"generations": [[gen]], "llm_output": {}} - return outputs, prompt_tokens, output_tokens, total_tokens - - -@pytest.mark.parametrize( - "build_case", - [ - case_generation_info_token_usage, - case_message_response_metadata_token_usage, - case_message_response_metadata_token_usage_uppercase, - case_llm_output_token_usage_priority, - case_llm_output_empty_should_fallback, - ], -) -def test_token_counts_real_formats_minimal(build_case): - outputs, prompt_tokens, output_tokens, total_tokens = build_case() - result = dict(_token_counts(outputs)) - - assert result[LLM_USAGE_PROMPT_TOKENS] == prompt_tokens - assert result[LLM_USAGE_COMPLETION_TOKENS] == output_tokens - assert result[LLM_USAGE_TOTAL_TOKENS] == total_tokens diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.latest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.latest.txt new file mode 100644 index 000000000..c775af1a9 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.latest.txt @@ -0,0 +1,49 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ******************************** +# WARNING: NOT HERMETIC !!!!!!!!!! +# ******************************** +# +# This "requirements.txt" is installed in conjunction +# with multiple other dependencies in the top-level "tox-loongsuite.ini" +# file. In particular, please see: +# +# loongsuite-langchain: {[testenv]test_deps} +# loongsuite-langchain: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.latest.txt +# +# This provides additional dependencies, namely: +# +# opentelemetry-api +# opentelemetry-sdk +# opentelemetry-semantic-conventions +# +# ... with a "dev" version based on the latest distribution. + +# This variant of the requirements aims to test the system using +# the newest supported version of external dependencies. + +langchain_core<1.0.0 +langchain<1.0.0 +langchain-community<1.0.0 +langchain-openai>=0.2.0 +pytest +wrapt +respx>=0.20.0 +httpx>=0.24.0 +numpy>=1.20.0 + +-e opentelemetry-instrumentation +-e util/opentelemetry-util-genai +-e instrumentation-loongsuite/loongsuite-instrumentation-langchain diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.oldest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.oldest.txt new file mode 100644 index 000000000..ff0cfa6d1 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.oldest.txt @@ -0,0 +1,33 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This variant of the requirements aims to test the system using +# the oldest supported version of external dependencies. + +langchain_core<1.0.0 +langchain<1.0.0 +langchain-community<1.0.0 +langchain-openai>=0.2.0 +pytest +wrapt +respx>=0.20.0 +httpx>=0.24.0 +numpy>=1.20.0 +opentelemetry-api==1.37 +opentelemetry-sdk==1.37 +opentelemetry-instrumentation==0.58b0 +opentelemetry-semantic-conventions==0.58b0 + +-e util/opentelemetry-util-genai +-e instrumentation-loongsuite/loongsuite-instrumentation-langchain diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_agent_spans.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_agent_spans.py new file mode 100644 index 000000000..00da288db --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_agent_spans.py @@ -0,0 +1,53 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Agent span creation — verifying AGENT_RUN_NAMES detection.""" + +from opentelemetry.instrumentation.langchain.internal._utils import ( + AGENT_RUN_NAMES, + _is_agent_run, +) + + +class _FakeRun: + """Minimal stub that looks like a langchain Run for unit tests.""" + + def __init__(self, name: str): + self.name = name + + +class TestAgentDetection: + def test_agent_executor_detected(self): + assert _is_agent_run(_FakeRun("AgentExecutor")) + + def test_mrkl_chain_detected(self): + assert _is_agent_run(_FakeRun("MRKLChain")) + + def test_react_chain_detected(self): + assert _is_agent_run(_FakeRun("ReActChain")) + + def test_self_ask_chain_detected(self): + assert _is_agent_run(_FakeRun("SelfAskWithSearchChain")) + + def test_regular_chain_not_detected(self): + assert not _is_agent_run(_FakeRun("RunnableSequence")) + + def test_empty_name_not_detected(self): + assert not _is_agent_run(_FakeRun("")) + + def test_none_name_not_detected(self): + assert not _is_agent_run(_FakeRun(None)) + + def test_agent_run_names_immutable(self): + assert isinstance(AGENT_RUN_NAMES, frozenset) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_basic.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_basic.py new file mode 100644 index 000000000..f1e57a4eb --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_basic.py @@ -0,0 +1,114 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Basic integration tests for LangChain Instrumentation. + +Verifies: +- Tracer is injected into CallbackManager +- Sync/async chain calls produce spans +- Error chains produce error spans +""" + +import asyncio + +import pytest +from langchain_core.callbacks.manager import BaseCallbackManager +from langchain_core.runnables import RunnableLambda + +from opentelemetry.instrumentation.langchain.internal._tracer import ( + LoongsuiteTracer, +) + + +class TestTracerInjection: + def test_tracer_injected(self, instrument): + manager = BaseCallbackManager(handlers=[]) + has_tracer = any( + isinstance(h, LoongsuiteTracer) + for h in manager.inheritable_handlers + ) + assert has_tracer + + def test_singleton(self, instrument): + m1 = BaseCallbackManager(handlers=[]) + m2 = BaseCallbackManager(handlers=[]) + t1 = next( + h + for h in m1.inheritable_handlers + if isinstance(h, LoongsuiteTracer) + ) + t2 = next( + h + for h in m2.inheritable_handlers + if isinstance(h, LoongsuiteTracer) + ) + assert t1 is t2 + + def test_not_duplicated(self, instrument): + m = BaseCallbackManager(handlers=[]) + count = sum( + 1 + for h in m.inheritable_handlers + if isinstance(h, LoongsuiteTracer) + ) + assert count == 1 + + +class TestSyncChainSpans: + def test_simple_chain(self, instrument, span_exporter): + chain = RunnableLambda(lambda x: f"out({x})") + result = chain.invoke("hello") + assert result == "out(hello)" + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 + chain_spans = [s for s in spans if s.name.startswith("chain ")] + assert len(chain_spans) >= 1 + + def test_multi_step_chain(self, instrument, span_exporter): + chain = RunnableLambda(lambda x: f"a({x})") | RunnableLambda( + lambda x: f"b({x})" + ) + result = chain.invoke("hi") + assert result == "b(a(hi))" + spans = span_exporter.get_finished_spans() + assert len(spans) >= 2 + + def test_chain_error(self, instrument, span_exporter): + def fail(x): + raise ValueError("boom") + + with pytest.raises(ValueError, match="boom"): + RunnableLambda(fail).invoke("x") + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 + + +class TestAsyncChainSpans: + def test_async_chain(self, instrument, span_exporter): + async def fn(x): + return f"async({x})" + + result = asyncio.run(RunnableLambda(fn).ainvoke("val")) + assert result == "async(val)" + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 + + def test_async_chain_error(self, instrument, span_exporter): + async def fail(x): + raise ValueError("async boom") + + with pytest.raises(ValueError, match="async boom"): + asyncio.run(RunnableLambda(fail).ainvoke("x")) + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_chain_spans.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_chain_spans.py new file mode 100644 index 000000000..14c49b025 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_chain_spans.py @@ -0,0 +1,172 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Chain span creation and attributes.""" + +import json + +import pytest +from langchain_core.runnables import RunnableLambda + +from opentelemetry.instrumentation.langchain.internal.semconv import ( + GEN_AI_OPERATION_NAME, + GEN_AI_SPAN_KIND, + INPUT_VALUE, + OUTPUT_VALUE, +) +from opentelemetry.trace import StatusCode + + +def _find_chain_spans(span_exporter): + spans = span_exporter.get_finished_spans() + return [s for s in spans if s.name.startswith("chain ")] + + +class TestChainSpanCreation: + def test_chain_creates_span(self, instrument, span_exporter): + chain = RunnableLambda(lambda x: f"result({x})") + result = chain.invoke("input") + assert result == "result(input)" + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + + def test_chain_span_has_input_output(self, instrument, span_exporter): + chain = RunnableLambda(lambda x: f"out({x})") + chain.invoke("test_input") + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + + attrs = dict(chain_spans[0].attributes) + assert INPUT_VALUE in attrs + assert OUTPUT_VALUE in attrs + + def test_chain_span_kind_attribute(self, instrument, span_exporter): + chain = RunnableLambda(lambda x: x) + chain.invoke("test") + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + attrs = dict(chain_spans[0].attributes) + assert attrs.get(GEN_AI_SPAN_KIND) == "CHAIN" + + def test_chain_span_operation_name(self, instrument, span_exporter): + """Chain spans must have gen_ai.operation.name=chain.""" + chain = RunnableLambda(lambda x: x) + chain.invoke("test") + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + attrs = dict(chain_spans[0].attributes) + assert attrs.get(GEN_AI_OPERATION_NAME) == "chain" + + +class TestChainInputOutputContent: + """Verify actual input/output values in chain span attributes.""" + + def test_input_value_contains_data(self, instrument, span_exporter): + chain = RunnableLambda(lambda x: f"result({x})") + chain.invoke("hello_chain") + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + attrs = dict(chain_spans[0].attributes) + + input_val = attrs.get(INPUT_VALUE, "") + assert "hello_chain" in input_val, ( + f"Expected 'hello_chain' in input.value, got: {input_val}" + ) + + def test_output_value_contains_data(self, instrument, span_exporter): + chain = RunnableLambda(lambda x: f"processed({x})") + chain.invoke("data") + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + attrs = dict(chain_spans[0].attributes) + + output_val = attrs.get(OUTPUT_VALUE, "") + assert "processed(data)" in output_val, ( + f"Expected 'processed(data)' in output.value, got: {output_val}" + ) + + def test_dict_input_serialized(self, instrument, span_exporter): + """Verify dict inputs are JSON-serialised in input.value.""" + chain = RunnableLambda(lambda x: x.get("msg", "")) + chain.invoke({"msg": "payload", "key": 42}) + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + attrs = dict(chain_spans[0].attributes) + + input_val = attrs.get(INPUT_VALUE, "") + parsed = json.loads(input_val) + assert parsed.get("msg") == "payload" + assert parsed.get("key") == 42 + + def test_no_content_when_disabled( + self, instrument_no_content, span_exporter + ): + """Chain input/output should NOT be recorded when content capture is off.""" + chain = RunnableLambda(lambda x: f"result({x})") + chain.invoke("secret_data") + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + attrs = dict(chain_spans[0].attributes) + assert INPUT_VALUE not in attrs + assert OUTPUT_VALUE not in attrs + + +class TestChainComposition: + def test_multi_step_chain(self, instrument, span_exporter): + chain = RunnableLambda(lambda x: f"a({x})") | RunnableLambda( + lambda x: f"b({x})" + ) + result = chain.invoke("in") + assert result == "b(a(in))" + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 2 + + def test_multi_step_chain_data_flows(self, instrument, span_exporter): + """Verify intermediate data flows through chain spans.""" + chain = RunnableLambda(lambda x: f"step1({x})") | RunnableLambda( + lambda x: f"step2({x})" + ) + chain.invoke("start") + + chain_spans = _find_chain_spans(span_exporter) + all_outputs = [ + dict(s.attributes).get(OUTPUT_VALUE, "") for s in chain_spans + ] + has_step1_output = any("step1(start)" in o for o in all_outputs) + has_step2_output = any("step2(step1(start))" in o for o in all_outputs) + assert has_step1_output, f"step1 output not found in: {all_outputs}" + assert has_step2_output, f"step2 output not found in: {all_outputs}" + + +class TestChainError: + def test_error_chain_produces_error_span(self, instrument, span_exporter): + def fail(x): + raise ValueError("chain failure") + + with pytest.raises(ValueError, match="chain failure"): + RunnableLambda(fail).invoke("x") + + chain_spans = _find_chain_spans(span_exporter) + assert len(chain_spans) >= 1 + error_span = chain_spans[0] + assert error_span.status.status_code == StatusCode.ERROR diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_data_extraction.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_data_extraction.py new file mode 100644 index 000000000..eab686481 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_data_extraction.py @@ -0,0 +1,507 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for data extraction helper functions in _utils.py.""" + +from opentelemetry.instrumentation.langchain.internal._utils import ( + _convert_lc_message_to_input, + _extract_finish_reasons, + _extract_llm_input_messages, + _extract_llm_output_messages, + _extract_model_name, + _extract_provider, + _extract_response_model, + _extract_token_usage, + _extract_tool_definitions, + _safe_json, +) +from opentelemetry.util.genai.types import ( + FunctionToolDefinition, + Text, + ToolCall, + ToolCallResponse, +) + + +class _FakeRun: + """Minimal stub mimicking a LangChain Run object.""" + + def __init__(self, **kwargs): + self.name = kwargs.get("name", "test") + self.inputs = kwargs.get("inputs", {}) + self.outputs = kwargs.get("outputs", {}) + self.extra = kwargs.get("extra", {}) + self.serialized = kwargs.get("serialized", {}) + self.error = kwargs.get("error", None) + + +class TestExtractModelName: + def test_from_invocation_params_model_name(self): + run = _FakeRun(extra={"invocation_params": {"model_name": "gpt-4"}}) + assert _extract_model_name(run) == "gpt-4" + + def test_from_invocation_params_model(self): + run = _FakeRun(extra={"invocation_params": {"model": "claude-3"}}) + assert _extract_model_name(run) == "claude-3" + + def test_none_when_missing(self): + run = _FakeRun() + assert _extract_model_name(run) is None + + +class TestExtractProvider: + def test_from_serialized_id(self): + run = _FakeRun( + serialized={"id": ["langchain", "llms", "openai", "ChatOpenAI"]} + ) + assert _extract_provider(run) == "openai" + + def test_default_langchain(self): + run = _FakeRun() + assert _extract_provider(run) == "langchain" + + +class TestConvertMessage: + def test_human_message(self): + msg = { + "id": ["langchain", "schema", "HumanMessage"], + "kwargs": {"content": "Hello"}, + } + result = _convert_lc_message_to_input(msg) + assert result is not None + assert result.role == "user" + assert len(result.parts) == 1 + assert isinstance(result.parts[0], Text) + assert result.parts[0].content == "Hello" + + def test_ai_message(self): + msg = { + "id": ["langchain", "schema", "AIMessage"], + "kwargs": {"content": "Hi there"}, + } + result = _convert_lc_message_to_input(msg) + assert result is not None + assert result.role == "assistant" + + def test_message_with_tool_calls(self): + msg = { + "id": ["langchain", "schema", "AIMessage"], + "kwargs": { + "content": "", + "tool_calls": [ + {"name": "search", "args": {"q": "test"}, "id": "tc1"} + ], + }, + } + result = _convert_lc_message_to_input(msg) + assert result is not None + assert any(isinstance(p, ToolCall) for p in result.parts) + + def test_tool_message(self): + """ToolMessage (role=tool) should be converted to ToolCallResponse, not Text.""" + msg = { + "id": ["langchain", "schema", "ToolMessage"], + "kwargs": { + "content": "search result: 42", + "tool_call_id": "call_abc123", + }, + } + result = _convert_lc_message_to_input(msg) + assert result is not None + assert result.role == "tool" + assert len(result.parts) == 1 + assert isinstance(result.parts[0], ToolCallResponse) + assert result.parts[0].response == "search result: 42" + assert result.parts[0].id == "call_abc123" + + def test_tool_message_empty_content(self): + """ToolMessage with empty content should still produce ToolCallResponse.""" + msg = { + "id": ["langchain", "schema", "ToolMessage"], + "kwargs": { + "content": "", + "tool_call_id": "call_xyz", + }, + } + result = _convert_lc_message_to_input(msg) + assert result is not None + assert result.role == "tool" + assert len(result.parts) == 1 + assert isinstance(result.parts[0], ToolCallResponse) + assert result.parts[0].response == "" + assert result.parts[0].id == "call_xyz" + + def test_none_for_non_dict(self): + assert _convert_lc_message_to_input("not a dict") is None + + +class TestExtractToolDefinitions: + def test_from_invocation_params_openai_format(self): + """Tools in OpenAI format: {type: function, function: {...}}.""" + run = _FakeRun( + extra={ + "invocation_params": { + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather", + "parameters": {"type": "object"}, + }, + }, + ] + } + } + ) + result = _extract_tool_definitions(run) + assert len(result) == 1 + assert isinstance(result[0], FunctionToolDefinition) + assert result[0].name == "get_weather" + assert result[0].description == "Get weather" + + def test_from_invocation_params_flat_format(self): + """Tools in flat format: {name, description, parameters}.""" + run = _FakeRun( + extra={ + "invocation_params": { + "tools": [ + { + "name": "search", + "description": "Search tool", + "parameters": {}, + }, + ] + } + } + ) + result = _extract_tool_definitions(run) + assert len(result) == 1 + assert result[0].name == "search" + + def test_from_inputs(self): + """Tools in run.inputs when not in invocation_params.""" + run = _FakeRun( + inputs={ + "tools": [ + { + "type": "function", + "function": { + "name": "calculator", + "description": "Do math", + "parameters": {}, + }, + }, + ] + } + ) + result = _extract_tool_definitions(run) + assert len(result) == 1 + assert result[0].name == "calculator" + + def test_empty_when_no_tools(self): + run = _FakeRun(extra={}, inputs={}) + assert _extract_tool_definitions(run) == [] + + +class TestExtractLLMInputMessages: + def test_from_messages_field(self): + run = _FakeRun( + inputs={ + "messages": [ + [ + { + "id": ["langchain", "schema", "HumanMessage"], + "kwargs": {"content": "Hi"}, + } + ] + ] + } + ) + messages = _extract_llm_input_messages(run) + assert len(messages) == 1 + assert messages[0].role == "user" + + def test_from_prompts_field(self): + run = _FakeRun(inputs={"prompts": ["Tell me a joke"]}) + messages = _extract_llm_input_messages(run) + assert len(messages) == 1 + assert messages[0].parts[0].content == "Tell me a joke" + + def test_empty_inputs(self): + run = _FakeRun(inputs={}) + messages = _extract_llm_input_messages(run) + assert messages == [] + + def test_messages_with_tool_message(self): + """Messages containing ToolMessage should convert to ToolCallResponse.""" + run = _FakeRun( + inputs={ + "messages": [ + [ + { + "id": ["langchain", "schema", "HumanMessage"], + "kwargs": {"content": "search for x"}, + }, + { + "id": ["langchain", "schema", "ToolMessage"], + "kwargs": { + "content": "found: 42", + "tool_call_id": "call_123", + }, + }, + ] + ] + } + ) + messages = _extract_llm_input_messages(run) + assert len(messages) == 2 + assert messages[0].role == "user" + assert messages[1].role == "tool" + assert isinstance(messages[1].parts[0], ToolCallResponse) + assert messages[1].parts[0].response == "found: 42" + assert messages[1].parts[0].id == "call_123" + + +class TestExtractLLMOutputMessages: + def test_basic_generation(self): + run = _FakeRun( + outputs={ + "generations": [ + [ + { + "text": "Hello world", + "generation_info": {"finish_reason": "stop"}, + } + ] + ] + } + ) + messages = _extract_llm_output_messages(run) + assert len(messages) == 1 + assert messages[0].role == "assistant" + assert messages[0].finish_reason == "stop" + + def test_empty_outputs(self): + run = _FakeRun(outputs={}) + assert _extract_llm_output_messages(run) == [] + + +class TestExtractTokenUsage: + def test_from_llm_output(self): + run = _FakeRun( + outputs={ + "llm_output": { + "token_usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + } + } + } + ) + inp, out = _extract_token_usage(run) + assert inp == 10 + assert out == 20 + + def test_alternative_keys(self): + run = _FakeRun( + outputs={ + "llm_output": { + "usage": { + "input_tokens": 5, + "output_tokens": 15, + } + } + } + ) + inp, out = _extract_token_usage(run) + assert inp == 5 + assert out == 15 + + def test_from_generation_info_token_usage(self): + """Providers that don't populate llm_output may put token_usage in generation_info.""" + run = _FakeRun( + outputs={ + "generations": [ + [ + { + "text": "Hello", + "generation_info": { + "finish_reason": "stop", + "token_usage": { + "input_tokens": 39, + "output_tokens": 8, + }, + }, + } + ] + ] + } + ) + inp, out = _extract_token_usage(run) + assert inp == 39 + assert out == 8 + + def test_from_generation_info_usage(self): + """generation_info may use 'usage' key with prompt_tokens/completion_tokens.""" + run = _FakeRun( + outputs={ + "generations": [ + [ + { + "text": "Hi", + "generation_info": { + "usage": { + "prompt_tokens": 12, + "completion_tokens": 6, + } + }, + } + ] + ] + } + ) + inp, out = _extract_token_usage(run) + assert inp == 12 + assert out == 6 + + def test_from_message_response_metadata_dict(self): + """Token usage may be in message.kwargs.response_metadata (serialized format).""" + run = _FakeRun( + outputs={ + "generations": [ + [ + { + "text": "Response", + "message": { + "kwargs": { + "content": "Response", + "response_metadata": { + "token_usage": { + "prompt_tokens": 100, + "completion_tokens": 25, + } + }, + } + }, + } + ] + ] + } + ) + inp, out = _extract_token_usage(run) + assert inp == 100 + assert out == 25 + + def test_from_message_response_metadata_object(self): + """Token usage may be in message.response_metadata (object format, not serialized).""" + + class _FakeMessage: + response_metadata = { + "token_usage": { + "prompt_tokens": 50, + "completion_tokens": 10, + } + } + + run = _FakeRun( + outputs={ + "generations": [ + [ + { + "text": "Response", + "message": _FakeMessage(), + } + ] + ] + } + ) + inp, out = _extract_token_usage(run) + assert inp == 50 + assert out == 10 + + def test_llm_output_takes_precedence(self): + """When both llm_output and generation_info have token_usage, prefer llm_output.""" + run = _FakeRun( + outputs={ + "llm_output": { + "token_usage": { + "prompt_tokens": 1, + "completion_tokens": 2, + } + }, + "generations": [ + [ + { + "generation_info": { + "token_usage": { + "input_tokens": 99, + "output_tokens": 99, + } + } + } + ] + ], + } + ) + inp, out = _extract_token_usage(run) + assert inp == 1 + assert out == 2 + + def test_no_token_usage(self): + run = _FakeRun(outputs={}) + inp, out = _extract_token_usage(run) + assert inp is None + assert out is None + + +class TestExtractFinishReasons: + def test_basic(self): + run = _FakeRun( + outputs={ + "generations": [ + [{"generation_info": {"finish_reason": "stop"}}] + ] + } + ) + reasons = _extract_finish_reasons(run) + assert reasons == ["stop"] + + def test_none_when_empty(self): + run = _FakeRun(outputs={}) + assert _extract_finish_reasons(run) is None + + +class TestExtractResponseModel: + def test_from_llm_output(self): + run = _FakeRun(outputs={"llm_output": {"model_name": "gpt-4-turbo"}}) + assert _extract_response_model(run) == "gpt-4-turbo" + + def test_none_when_missing(self): + run = _FakeRun(outputs={}) + assert _extract_response_model(run) is None + + +class TestSafeJson: + def test_basic_dict(self): + assert '"a": 1' in _safe_json({"a": 1}) + + def test_truncation(self): + result = _safe_json({"x": "a" * 10000}, max_len=100) + assert result.endswith("...[truncated]") + assert len(result) <= 114 # max_len(100) + len("...[truncated]")(14) + + def test_non_serializable(self): + result = _safe_json(object()) + assert isinstance(result, str) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_instrumentor.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_instrumentor.py new file mode 100644 index 000000000..a31fcd451 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_instrumentor.py @@ -0,0 +1,100 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for LangChain Instrumentor lifecycle.""" + +import unittest + +import wrapt + +from opentelemetry import trace +from opentelemetry.instrumentation.langchain import LangChainInstrumentor +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) + + +class TestLangChainInstrumentor(unittest.TestCase): + def setUp(self): + self.exporter = InMemorySpanExporter() + self.tracer_provider = TracerProvider() + self.tracer_provider.add_span_processor( + SimpleSpanProcessor(self.exporter) + ) + trace.set_tracer_provider(self.tracer_provider) + self.instrumentor = LangChainInstrumentor() + + def tearDown(self): + try: + self.instrumentor.uninstrument() + except Exception: + pass + self.exporter.clear() + + def test_instrumentor_init(self): + self.assertIsNotNone(self.instrumentor) + + def test_instrumentation_dependencies(self): + dependencies = self.instrumentor.instrumentation_dependencies() + self.assertIsInstance(dependencies, tuple) + self.assertTrue(any("langchain_core" in dep for dep in dependencies)) + + def test_instrument(self): + self.instrumentor.instrument(tracer_provider=self.tracer_provider) + from langchain_core.callbacks import ( # noqa: PLC0415 + BaseCallbackManager, + ) + + self.assertTrue( + isinstance(BaseCallbackManager.__init__, wrapt.ObjectProxy), + "BaseCallbackManager.__init__ should be wrapped after instrument", + ) + + def test_uninstrument(self): + self.instrumentor.instrument(tracer_provider=self.tracer_provider) + self.instrumentor.uninstrument() + from langchain_core.callbacks import ( # noqa: PLC0415 + BaseCallbackManager, + ) + + self.assertFalse( + isinstance(BaseCallbackManager.__init__, wrapt.ObjectProxy), + "BaseCallbackManager.__init__ should be restored after uninstrument", + ) + + def test_uninstrument_without_instrument(self): + try: + self.instrumentor.uninstrument() + except Exception as e: + self.fail( + f"uninstrument() raised an exception when not instrumented: {e}" + ) + + def test_instrument_multiple_times(self): + self.instrumentor.instrument(tracer_provider=self.tracer_provider) + self.instrumentor.instrument(tracer_provider=self.tracer_provider) + from langchain_core.callbacks import ( # noqa: PLC0415 + BaseCallbackManager, + ) + + self.assertTrue( + isinstance(BaseCallbackManager.__init__, wrapt.ObjectProxy), + "BaseCallbackManager.__init__ should still be wrapped", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_langchain_instrumentor.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_langchain_instrumentor.py new file mode 100644 index 000000000..af730e810 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_langchain_instrumentor.py @@ -0,0 +1,487 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Integration tests for LangChain Instrumentor with RetrievalQA and ChatOpenAI. + +Uses instrument mode with content capture (SPAN_ONLY) to verify input/output +and semantic convention attributes. +""" + +from __future__ import annotations + +import asyncio +import json +import random +from contextlib import suppress +from itertools import count +from typing import Any, Dict, Iterable, Iterator, List, Tuple + +import numpy as np +import openai +import pytest +import respx +from httpx import AsyncByteStream, Response, SyncByteStream +from langchain.chains import RetrievalQA +from langchain_community.embeddings import FakeEmbeddings +from langchain_community.retrievers import KNNRetriever +from langchain_core.prompts import PromptTemplate +from langchain_openai import ChatOpenAI + +from opentelemetry.instrumentation.langchain.internal.semconv import ( + GEN_AI_OPERATION_NAME, + GEN_AI_RETRIEVAL_DOCUMENTS, + GEN_AI_RETRIEVAL_QUERY, + GEN_AI_SPAN_KIND, + INPUT_VALUE, + OUTPUT_VALUE, +) +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.trace import StatusCode + + +def _randstr() -> str: + return str(random.random()) + + +class MockByteStream(SyncByteStream, AsyncByteStream): + """Mock byte stream for streaming responses.""" + + def __init__(self, byte_stream: Iterable[bytes]): + self._byte_stream = byte_stream + + def __iter__(self) -> Iterator[bytes]: + for byte_string in self._byte_stream: + yield byte_string + + async def __aiter__(self): + for byte_string in self._byte_stream: + yield byte_string + + +# --------------------------------------------------------------------------- +# Fixtures (use conftest's instrument, span_exporter, metric_reader) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def documents() -> List[str]: + return [_randstr(), _randstr()] + + +@pytest.fixture +def chat_completion_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]: + return ( + [ + b'data: {"choices": [{"delta": {"role": "assistant"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"content": "A"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"content": "B"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"content": "C"}, "index": 0}]}\n\n', + b"data: [DONE]\n", + ], + [{"role": "assistant", "content": "ABC"}], + ) + + +@pytest.fixture +def completion_usage() -> Dict[str, Any]: + prompt_tokens = random.randint(1, 1000) + completion_tokens = random.randint(1, 1000) + return { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + } + + +@pytest.fixture +def model_name() -> str: + return _randstr() + + +@pytest.fixture(autouse=True) +def openai_api_key(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("OPENAI_API_KEY", "sk-") + + +@pytest.fixture(scope="module") +def seed() -> Iterator[int]: + """Rolling seeds for repeatable debugging.""" + return count() + + +@pytest.fixture(autouse=True) +def set_seed(seed: Iterator[int]) -> Iterator[None]: + random.seed(next(seed)) + yield + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("is_async", [False, True]) +@pytest.mark.parametrize("is_stream", [False, True]) +@pytest.mark.parametrize("status_code", [200, 400]) +def test_retrieval_qa_chain_spans( + is_async: bool, + is_stream: bool, + status_code: int, + respx_mock: respx.MockRouter, + instrument, + span_exporter: InMemorySpanExporter, + documents: List[str], + chat_completion_mock_stream: Tuple[List[bytes], List[Dict[str, Any]]], + model_name: str, + completion_usage: Dict[str, Any], +) -> None: + """Test RetrievalQA chain produces correct spans with input/output attributes.""" + question = _randstr() + template = "{context}{question}" + prompt = PromptTemplate( + input_variables=["context", "question"], template=template + ) + output_messages: List[Dict[str, Any]] = ( + chat_completion_mock_stream[1] + if is_stream + else [{"role": _randstr(), "content": _randstr()}] + ) + url = "https://api.openai.com/v1/chat/completions" + respx_kwargs: Dict[str, Any] = ( + {"stream": MockByteStream(chat_completion_mock_stream[0])} + if is_stream + else { + "json": { + "choices": [ + { + "index": i, + "message": message, + "finish_reason": "stop", + } + for i, message in enumerate(output_messages) + ], + "model": model_name, + "usage": completion_usage, + } + } + ) + respx_mock.post(url).mock( + return_value=Response(status_code=status_code, **respx_kwargs) + ) + chat_model = ChatOpenAI(model_name="gpt-3.5-turbo", streaming=is_stream) + retriever = KNNRetriever( + index=np.ones((len(documents), 2)), + texts=documents, + embeddings=FakeEmbeddings(size=2), + ) + rqa = RetrievalQA.from_chain_type( + llm=chat_model, + retriever=retriever, + chain_type_kwargs={"prompt": prompt}, + ) + with suppress(openai.BadRequestError): + if is_async: + asyncio.run(rqa.ainvoke({"query": question})) + else: + rqa.invoke({"query": question}) + + spans = span_exporter.get_finished_spans() + spans_by_name = {span.name: span for span in spans} + + # Chain spans use "chain {run.name}" format + rqa_span = spans_by_name.get("chain RetrievalQA") + assert rqa_span is not None, ( + f"Expected chain RetrievalQA span, got: {list(spans_by_name.keys())}" + ) + assert rqa_span.parent is None + rqa_attrs = dict(rqa_span.attributes or {}) + assert rqa_attrs.pop(GEN_AI_OPERATION_NAME, None) == "chain" + assert rqa_attrs.pop(GEN_AI_SPAN_KIND, None) == "CHAIN" + # INPUT_VALUE is JSON; RetrievalQA input is {"query": question} + input_val = rqa_attrs.pop(INPUT_VALUE, None) + assert input_val is not None + input_parsed = ( + json.loads(input_val) if isinstance(input_val, str) else input_val + ) + assert input_parsed.get("query") == question + if status_code == 200: + assert rqa_span.status.status_code == StatusCode.UNSET + out_val = rqa_attrs.pop(OUTPUT_VALUE, None) + assert out_val is not None + out_parsed = ( + json.loads(out_val) if isinstance(out_val, str) else out_val + ) + assert out_parsed.get("result") == output_messages[0]["content"] + elif status_code == 400: + assert rqa_span.status.status_code == StatusCode.ERROR + assert len(rqa_span.events) >= 1 + assert rqa_span.events[0].name == "exception" + assert not rqa_attrs or set(rqa_attrs.keys()) <= {"metadata"} + + # StuffDocumentsChain + sd_span = spans_by_name.get("chain StuffDocumentsChain") + assert sd_span is not None + assert sd_span.parent is not None + assert sd_span.parent.span_id == rqa_span.context.span_id + sd_attrs = dict(sd_span.attributes or {}) + assert sd_attrs.pop(GEN_AI_OPERATION_NAME, None) == "chain" + assert sd_attrs.pop(GEN_AI_SPAN_KIND, None) == "CHAIN" + assert sd_attrs.pop(INPUT_VALUE, None) is not None + if status_code == 200: + assert sd_span.status.status_code == StatusCode.UNSET + assert sd_attrs.pop(OUTPUT_VALUE, None) is not None + elif status_code == 400: + assert sd_span.status.status_code == StatusCode.ERROR + assert not sd_attrs or set(sd_attrs.keys()) <= {"metadata"} + + # Retriever span: name is "retrieve_documents" + retriever_span = spans_by_name.get("retrieve_documents") + assert retriever_span is not None + assert retriever_span.parent is not None + assert retriever_span.parent.span_id == rqa_span.context.span_id + retriever_attrs = dict(retriever_span.attributes or {}) + assert retriever_attrs.pop(GEN_AI_SPAN_KIND, None) == "RETRIEVER" + assert retriever_attrs.pop(GEN_AI_RETRIEVAL_QUERY, None) == question + docs_val = retriever_attrs.pop(GEN_AI_RETRIEVAL_DOCUMENTS, None) + assert docs_val is not None + for text in documents: + assert text in docs_val + + # LLMChain + llm_chain_span = spans_by_name.get("chain LLMChain") + assert llm_chain_span is not None + assert llm_chain_span.parent is not None + assert llm_chain_span.parent.span_id == sd_span.context.span_id + llm_chain_attrs = dict(llm_chain_span.attributes or {}) + assert llm_chain_attrs.pop(GEN_AI_SPAN_KIND, None) == "CHAIN" + llm_input = llm_chain_attrs.get(INPUT_VALUE) + assert llm_input is not None + llm_input_parsed = ( + json.loads(llm_input) if isinstance(llm_input, str) else llm_input + ) + for var in ["question", "context"]: + assert var in llm_input_parsed + if status_code == 200: + assert llm_chain_attrs.pop(OUTPUT_VALUE, None) is not None + elif status_code == 400: + assert llm_chain_span.status.status_code == StatusCode.ERROR + + # ChatOpenAI LLM span: "chat {model_name}" + oai_span = spans_by_name.get("chat gpt-3.5-turbo") + assert oai_span is not None + assert oai_span.parent is not None + assert oai_span.parent.span_id == llm_chain_span.context.span_id + oai_attrs = dict(oai_span.attributes or {}) + assert oai_attrs.pop(GEN_AI_SPAN_KIND, None) == "LLM" + assert ( + oai_attrs.pop(GenAIAttributes.GEN_AI_REQUEST_MODEL, None) is not None + ) + assert ( + GenAIAttributes.GEN_AI_INPUT_MESSAGES in oai_attrs + or "input" in str(oai_attrs).lower() + ) + if status_code == 200: + assert oai_span.status.status_code == StatusCode.UNSET, ( + f"Expected UNSET, got {oai_span.status.status_code}" + ) + assert ( + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in oai_attrs + or "output" in str(oai_attrs).lower() + ) + if not is_stream: + assert ( + oai_attrs.pop( + GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, None + ) + is not None + ) + assert ( + oai_attrs.pop(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS, None) + == completion_usage["prompt_tokens"] + ) + assert ( + oai_attrs.pop(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, None) + == completion_usage["completion_tokens"] + ) + elif status_code == 400: + assert oai_span.status.status_code == StatusCode.ERROR + + +@pytest.mark.parametrize("is_async", [False, True]) +@pytest.mark.parametrize("is_stream", [False, True]) +@pytest.mark.parametrize("status_code", [200, 400]) +def test_retrieval_qa_metrics( + is_async: bool, + is_stream: bool, + status_code: int, + respx_mock: respx.MockRouter, + instrument, + metric_reader, + documents: List[str], + chat_completion_mock_stream: Tuple[List[bytes], List[Dict[str, Any]]], + model_name: str, + completion_usage: Dict[str, Any], +) -> None: + """Test that metrics are recorded for RetrievalQA chain.""" + question = _randstr() + template = "{context}{question}" + prompt = PromptTemplate( + input_variables=["context", "question"], template=template + ) + output_messages: List[Dict[str, Any]] = ( + chat_completion_mock_stream[1] + if is_stream + else [{"role": _randstr(), "content": _randstr()}] + ) + url = "https://api.openai.com/v1/chat/completions" + respx_kwargs: Dict[str, Any] = ( + {"stream": MockByteStream(chat_completion_mock_stream[0])} + if is_stream + else { + "json": { + "choices": [ + { + "index": i, + "message": message, + "finish_reason": "stop", + } + for i, message in enumerate(output_messages) + ], + "model": model_name, + "usage": completion_usage, + } + } + ) + respx_mock.post(url).mock( + return_value=Response(status_code=status_code, **respx_kwargs) + ) + chat_model = ChatOpenAI(model_name="gpt-3.5-turbo", streaming=is_stream) + retriever = KNNRetriever( + index=np.ones((len(documents), 2)), + texts=documents, + embeddings=FakeEmbeddings(size=2), + ) + rqa = RetrievalQA.from_chain_type( + llm=chat_model, + retriever=retriever, + chain_type_kwargs={"prompt": prompt}, + ) + with suppress(openai.BadRequestError): + if is_async: + asyncio.run(rqa.ainvoke({"query": question})) + else: + rqa.invoke({"query": question}) + + metric_reader.force_flush() + metric_data = metric_reader.get_metrics_data() + if metric_data is None or not metric_data.resource_metrics: + return + scope_metrics = metric_data.resource_metrics[0].scope_metrics + if not scope_metrics: + return + metric_list = scope_metrics[0].metrics + assert len(metric_list) >= 1 + + +def test_chain_metadata( + respx_mock: respx.MockRouter, + instrument, + span_exporter: InMemorySpanExporter, + completion_usage: Dict[str, Any], +) -> None: + """Test that chain metadata is captured in span attributes.""" + url = "https://api.openai.com/v1/chat/completions" + respx_mock.post(url).mock( + return_value=Response( + status_code=200, + json={ + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "nock nock", + }, + "finish_reason": "stop", + } + ], + "model": "gpt-3.5-turbo", + "usage": completion_usage, + }, + ) + ) + prompt_template = "Tell me a {adjective} joke" + prompt = PromptTemplate( + input_variables=["adjective"], template=prompt_template + ) + llm = ChatOpenAI() + chain = prompt | llm + chain = chain.with_config({"metadata": {"category": "jokes"}}) + chain.invoke({"adjective": "funny"}) + + spans = span_exporter.get_finished_spans() + spans_by_name = {span.name: span for span in spans} + + # LCEL chain: "chain RunnableSequence" or similar + chain_span = None + for name, span in spans_by_name.items(): + if name.startswith("chain ") and span.attributes: + chain_span = span + break + assert chain_span is not None + assert chain_span.attributes + metadata_val = chain_span.attributes.get("gen_ai.chain.metadata") + if metadata_val is not None: + assert "jokes" in str(metadata_val) or "category" in str(metadata_val) + + +def test_chain_exception_event( + instrument, + span_exporter: InMemorySpanExporter, +) -> None: + """Test that chain exceptions are recorded as span events.""" + + class MyCustomError(Exception): + pass + + class ErrorLLM(ChatOpenAI): + def _generate(self, *args, **kwargs): + raise MyCustomError("mock error") + + prompt = PromptTemplate( + input_variables=["question"], template="{question}" + ) + llm = ErrorLLM() + chain = prompt | llm + with pytest.raises(MyCustomError): + chain.invoke({"question": "test?"}) + + spans = span_exporter.get_finished_spans() + # Find span with exception event (may be chain or LLM span) + for span in spans: + if len(span.events) >= 1 and span.events[0].name == "exception": + assert span.status.status_code == StatusCode.ERROR + exc_type = span.events[0].attributes.get("exception.type", "") + exc_msg = span.events[0].attributes.get("exception.message", "") + # Exception type may be "Exception" or "MyCustomError" depending on handler + assert "mock error" in str(exc_msg) or "MyCustomError" in str( + exc_type + ) + return + pytest.fail("No span with exception event found") diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_llm_spans.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_llm_spans.py new file mode 100644 index 000000000..2e4d08e95 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_llm_spans.py @@ -0,0 +1,351 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for LLM span creation and attributes.""" + +import json +from typing import Any, List, Optional + +import pytest +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import ( + AIMessage, + BaseMessage, + HumanMessage, + SystemMessage, +) +from langchain_core.outputs import ChatGeneration, ChatResult + +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.trace import StatusCode + + +class FakeChatModel(BaseChatModel): + """A fake chat model for testing.""" + + model_name: str = "fake-model" + responses: List[str] = ["Hello from fake model"] + + @property + def _llm_type(self) -> str: + return "fake-chat-model" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + response = self.responses[0] if self.responses else "default" + message = AIMessage(content=response) + generation = ChatGeneration( + message=message, + generation_info={"finish_reason": "stop"}, + ) + return ChatResult( + generations=[generation], + llm_output={ + "token_usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + }, + "model_name": self.model_name, + }, + ) + + @property + def _identifying_params(self) -> dict: + return {"model_name": self.model_name} + + +class FakeErrorChatModel(BaseChatModel): + """A fake chat model that always raises errors.""" + + @property + def _llm_type(self) -> str: + return "fake-error-chat-model" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + raise ValueError("LLM error for testing") + + @property + def _identifying_params(self) -> dict: + return {} + + +def _find_chat_spans(span_exporter): + spans = span_exporter.get_finished_spans() + return [s for s in spans if "chat" in s.name.lower()] + + +class TestLLMSpanCreation: + def test_chat_model_creates_span(self, instrument, span_exporter): + llm = FakeChatModel() + result = llm.invoke([HumanMessage(content="Hi")]) + assert isinstance(result, AIMessage) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + + def test_llm_span_has_model_name(self, instrument, span_exporter): + llm = FakeChatModel(model_name="test-gpt") + llm.invoke([HumanMessage(content="test")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + span = chat_spans[0] + assert "test-gpt" in span.name or any( + "test-gpt" in str(v) for v in span.attributes.values() + ) + + def test_llm_span_operation_name(self, instrument, span_exporter): + llm = FakeChatModel() + llm.invoke([HumanMessage(content="Hi")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + attrs = dict(chat_spans[0].attributes) + assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat" + + def test_llm_span_token_usage(self, instrument, span_exporter): + llm = FakeChatModel() + llm.invoke([HumanMessage(content="count tokens")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + attrs = dict(chat_spans[0].attributes) + assert attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 10 + assert attrs.get(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS) == 5 + + def test_llm_span_finish_reasons(self, instrument, span_exporter): + llm = FakeChatModel() + llm.invoke([HumanMessage(content="Hi")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + attrs = dict(chat_spans[0].attributes) + finish_reasons = attrs.get( + GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS + ) + assert finish_reasons is not None + if isinstance(finish_reasons, tuple): + finish_reasons = list(finish_reasons) + assert "stop" in finish_reasons + + def test_llm_span_on_error(self, instrument, span_exporter): + llm = FakeErrorChatModel() + with pytest.raises(ValueError, match="LLM error"): + llm.invoke([HumanMessage(content="fail")]) + + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 + error_spans = [ + s for s in spans if s.status.status_code == StatusCode.ERROR + ] + assert len(error_spans) >= 1 + + +class TestLLMInputOutputContent: + """Verify that input/output messages are captured in span attributes.""" + + def test_input_messages_captured(self, instrument, span_exporter): + llm = FakeChatModel() + llm.invoke([HumanMessage(content="Hello world")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + attrs = dict(chat_spans[0].attributes) + + assert GenAIAttributes.GEN_AI_INPUT_MESSAGES in attrs, ( + "LLM span missing gen_ai.input_messages" + ) + input_raw = attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + input_msgs = json.loads(input_raw) + assert isinstance(input_msgs, list) + assert len(input_msgs) >= 1 + + has_user_msg = any( + m.get("role") == "user" + and any( + p.get("type") == "text" + and "Hello world" in p.get("content", "") + for p in m.get("parts", []) + ) + for m in input_msgs + ) + assert has_user_msg, ( + f"Expected user message with 'Hello world' in input_messages, got: {input_raw}" + ) + + def test_output_messages_captured(self, instrument, span_exporter): + llm = FakeChatModel(responses=["Test response from LLM"]) + llm.invoke([HumanMessage(content="Hi")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + attrs = dict(chat_spans[0].attributes) + + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in attrs, ( + "LLM span missing gen_ai.output_messages" + ) + output_raw = attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] + output_msgs = json.loads(output_raw) + assert isinstance(output_msgs, list) + assert len(output_msgs) >= 1 + + has_assistant_msg = any( + m.get("role") == "assistant" + and any( + p.get("type") == "text" + and "Test response from LLM" in p.get("content", "") + for p in m.get("parts", []) + ) + for m in output_msgs + ) + assert has_assistant_msg, ( + f"Expected assistant message with 'Test response from LLM', got: {output_raw}" + ) + + def test_multi_message_input(self, instrument, span_exporter): + """Verify system + user multi-turn messages are captured.""" + llm = FakeChatModel() + llm.invoke( + [ + SystemMessage(content="You are a helpful assistant."), + HumanMessage(content="What is Python?"), + ] + ) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + attrs = dict(chat_spans[0].attributes) + + input_raw = attrs.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES, "[]") + input_msgs = json.loads(input_raw) + assert len(input_msgs) >= 2, ( + f"Expected at least 2 input messages, got {len(input_msgs)}: {input_raw}" + ) + + roles = [m.get("role") for m in input_msgs] + assert "system" in roles, f"Missing system role in {roles}" + assert "user" in roles, f"Missing user role in {roles}" + + def test_no_content_when_disabled( + self, instrument_no_content, span_exporter + ): + """When content capture is disabled, messages should NOT appear in span attributes.""" + llm = FakeChatModel() + llm.invoke([HumanMessage(content="secret data")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + attrs = dict(chat_spans[0].attributes) + + assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in attrs, ( + "Input messages should NOT be captured when content capture is disabled" + ) + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in attrs, ( + "Output messages should NOT be captured when content capture is disabled" + ) + + assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat" + assert GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in attrs + + +class TestLLMToolDefinitions: + """Verify tool definitions are captured when LLM uses bind_tools.""" + + def test_tool_definitions_captured( + self, instrument, span_exporter, respx_mock, monkeypatch + ): + """When LLM uses bind_tools, gen_ai.tool.definitions should appear.""" + monkeypatch.setenv("OPENAI_API_KEY", "sk-test") + + from langchain_core.tools import tool # noqa: PLC0415 + + @tool + def get_weather(city: str) -> str: + """Get weather for a city.""" + return f"Weather in {city}" + + import httpx # noqa: PLC0415 + + respx_mock.post("https://api.openai.com/v1/chat/completions").mock( + return_value=httpx.Response( + status_code=200, + json={ + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "I'll check the weather.", + }, + "finish_reason": "stop", + } + ], + "model": "gpt-3.5-turbo", + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + }, + }, + ) + ) + + from langchain_openai import ChatOpenAI # noqa: PLC0415 + + llm = ChatOpenAI(model="gpt-3.5-turbo") + llm_with_tools = llm.bind_tools([get_weather]) + llm_with_tools.invoke([HumanMessage(content="What's the weather?")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 1 + attrs = dict(chat_spans[0].attributes) + + tool_defs_key = "gen_ai.tool.definitions" + assert tool_defs_key in attrs, ( + f"Expected {tool_defs_key} in span attributes when using bind_tools, " + f"got: {list(attrs.keys())}" + ) + tool_defs = json.loads(attrs[tool_defs_key]) + assert isinstance(tool_defs, list) + assert len(tool_defs) >= 1 + assert any(t.get("name") == "get_weather" for t in tool_defs), ( + f"Expected get_weather in tool_definitions, got: {tool_defs}" + ) + + +class TestLLMMultipleCalls: + def test_multiple_calls_create_multiple_spans( + self, instrument, span_exporter + ): + llm = FakeChatModel() + llm.invoke([HumanMessage(content="first")]) + llm.invoke([HumanMessage(content="second")]) + + chat_spans = _find_chat_spans(span_exporter) + assert len(chat_spans) >= 2 diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_react_step_patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_react_step_patch.py new file mode 100644 index 000000000..d4eeefd8b --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_react_step_patch.py @@ -0,0 +1,259 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ReAct Step instrumentation patch. + +Verifies that AgentExecutor._iter_next_step and _aiter_next_step are patched, +that ReAct Step spans are created with correct attributes, and that the +span hierarchy (Agent > ReAct Step > LLM/Tool) is correct. +""" + +import unittest +from typing import Any, List, Optional + +import pytest +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import AIMessage, BaseMessage +from langchain_core.outputs import ChatGeneration, ChatResult + +from opentelemetry.instrumentation.langchain import LangChainInstrumentor + + +class _FakeChatModel(BaseChatModel): + """Minimal fake chat model for ReAct agent testing.""" + + responses: List[str] = ["Thought: I have the answer.\nFinal Answer: 42"] + + @property + def _llm_type(self) -> str: + return "fake-chat-model" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + response = self.responses[0] if self.responses else "default" + message = AIMessage(content=response) + generation = ChatGeneration( + message=message, + generation_info={"finish_reason": "stop"}, + ) + return ChatResult( + generations=[generation], + llm_output={"model_name": "fake"}, + ) + + @property + def _identifying_params(self) -> dict: + return {} + + +def _get_agent_executor_classes(): + """Mirror of _get_agent_executor_classes from __init__.py for testing.""" + classes = [] + try: + from langchain.agents import AgentExecutor # noqa: PLC0415 + + classes.append(AgentExecutor) + except ImportError: + pass + try: + from langchain_classic.agents import AgentExecutor # noqa: PLC0415 + + if AgentExecutor not in classes: + classes.append(AgentExecutor) + except ImportError: + pass + return classes + + +class TestReActStepPatchApplied(unittest.TestCase): + """Verify the AgentExecutor patch is applied and restored.""" + + def setUp(self): + self.instrumentor = LangChainInstrumentor() + + def tearDown(self): + try: + self.instrumentor.uninstrument() + except Exception: + pass + + def test_agent_executor_patch_applied_after_instrument(self): + """AgentExecutor._iter_next_step should be wrapped after instrument.""" + classes = _get_agent_executor_classes() + if not classes: + pytest.skip( + "AgentExecutor not available (langchain not installed)" + ) + + self.instrumentor.instrument() + + # At least one class should have our wrapper + patched = [ + c + for c in classes + if c._iter_next_step.__name__ == "patched_iter_next_step" + ] + assert patched, ( + f"Expected at least one patched AgentExecutor, got: " + f"{[c._iter_next_step.__name__ for c in classes]}" + ) + + def test_agent_executor_patch_restored_after_uninstrument(self): + """AgentExecutor._iter_next_step should be original after uninstrument.""" + classes = _get_agent_executor_classes() + if not classes: + pytest.skip( + "AgentExecutor not available (langchain not installed)" + ) + + self.instrumentor.instrument() + self.instrumentor.uninstrument() + + # All should be restored to original + for cls in classes: + assert cls._iter_next_step.__name__ == "_iter_next_step", ( + f"Expected _iter_next_step after uninstrument, " + f"got {cls._iter_next_step.__name__}" + ) + + +class TestReActStepInstrumentationLogs(unittest.TestCase): + """Verify agent runs without crash (legacy test, now spans are primary).""" + + def setUp(self): + self.instrumentor = LangChainInstrumentor() + + def tearDown(self): + try: + self.instrumentor.uninstrument() + except Exception: + pass + + def test_react_step_agent_invoke_runs(self): + """When agent invokes, it should complete without error.""" + classes = _get_agent_executor_classes() + if not classes: + pytest.skip( + "AgentExecutor not available (langchain not installed)" + ) + AgentExecutor = classes[0] + + try: + from langchain.agents import create_react_agent # noqa: PLC0415 + from langchain_core.prompts import ( # noqa: PLC0415 + ChatPromptTemplate, + ) + from langchain_core.tools import tool # noqa: PLC0415 + except ImportError: + pytest.skip("create_react_agent or tools not available") + + self.instrumentor.instrument() + + @tool + def dummy_tool(query: str) -> str: + """A dummy tool for testing.""" + return f"result: {query}" + + llm = _FakeChatModel( + responses=["Thought: I have the answer.\nFinal Answer: 42"] + ) + prompt = ChatPromptTemplate.from_messages( + [ + ( + "human", + "Question: {input}\n\n" + "Tools: {tools}\nTool names: {tool_names}\n\n" + "Thought:{agent_scratchpad}", + ) + ] + ) + agent = create_react_agent(llm, [dummy_tool], prompt) + agent_executor = AgentExecutor( + agent=agent, + tools=[dummy_tool], + handle_parsing_errors=True, + max_iterations=2, + ) + + result = agent_executor.invoke({"input": "What is 6*7?"}) + + assert "output" in result or "result" in str(result) + + +def test_react_step_spans_on_agent_invoke(instrument, span_exporter): + """ReAct Step spans should be created with correct attributes.""" + classes = _get_agent_executor_classes() + if not classes: + pytest.skip("AgentExecutor not available (langchain not installed)") + AgentExecutor = classes[0] + + try: + from langchain.agents import create_react_agent # noqa: PLC0415 + from langchain_core.prompts import ChatPromptTemplate # noqa: PLC0415 + from langchain_core.tools import tool # noqa: PLC0415 + except ImportError: + pytest.skip("create_react_agent or tools not available") + + @tool + def dummy_tool(query: str) -> str: + """A dummy tool for testing.""" + return f"result: {query}" + + llm = _FakeChatModel( + responses=["Thought: I have the answer.\nFinal Answer: 42"] + ) + prompt = ChatPromptTemplate.from_messages( + [ + ( + "human", + "Question: {input}\n\n" + "Tools: {tools}\nTool names: {tool_names}\n\n" + "Thought:{agent_scratchpad}", + ) + ] + ) + agent = create_react_agent(llm, [dummy_tool], prompt) + agent_executor = AgentExecutor( + agent=agent, + tools=[dummy_tool], + handle_parsing_errors=True, + max_iterations=2, + ) + + agent_executor.invoke({"input": "What is 6*7?"}) + + spans = span_exporter.get_finished_spans() + react_step_spans = [ + s for s in spans if s.attributes.get("gen_ai.span.kind") == "STEP" + ] + assert len(react_step_spans) >= 1, ( + f"Expected at least 1 ReAct Step span, got: {[s.name for s in spans]}" + ) + + for step_span in react_step_spans: + assert step_span.name == "react step" + assert step_span.attributes.get("gen_ai.operation.name") == "react" + assert step_span.attributes.get("gen_ai.react.round") is not None + assert ( + step_span.attributes.get("gen_ai.react.finish_reason") is not None + ) + + last_step = react_step_spans[-1] + assert last_step.attributes.get("gen_ai.react.finish_reason") == "stop" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_retriever_spans.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_retriever_spans.py new file mode 100644 index 000000000..ff7fdfadb --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_retriever_spans.py @@ -0,0 +1,133 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Retriever span creation and attributes.""" + +from typing import List + +import pytest +from langchain_core.callbacks import CallbackManagerForRetrieverRun +from langchain_core.documents import Document +from langchain_core.retrievers import BaseRetriever + +from opentelemetry.instrumentation.langchain.internal.semconv import ( + GEN_AI_RETRIEVAL_DOCUMENTS, + GEN_AI_RETRIEVAL_QUERY, +) +from opentelemetry.trace import StatusCode + + +class FakeRetriever(BaseRetriever): + """A fake retriever for testing.""" + + docs: List[Document] = [] + + def _get_relevant_documents( + self, + query: str, + *, + run_manager: CallbackManagerForRetrieverRun, + ) -> List[Document]: + return self.docs or [ + Document( + page_content=f"Result for: {query}", + metadata={"source": "test"}, + ) + ] + + +class FakeErrorRetriever(BaseRetriever): + """A fake retriever that always fails.""" + + def _get_relevant_documents( + self, + query: str, + *, + run_manager: CallbackManagerForRetrieverRun, + ) -> List[Document]: + raise ValueError("retriever failure") + + +def _find_retriever_spans(span_exporter): + spans = span_exporter.get_finished_spans() + return [s for s in spans if "retrieve" in s.name.lower()] + + +class TestRetrieverSpanCreation: + def test_retriever_creates_span(self, instrument, span_exporter): + retriever = FakeRetriever() + docs = retriever.invoke("test query") + assert len(docs) >= 1 + + retriever_spans = _find_retriever_spans(span_exporter) + assert len(retriever_spans) >= 1 + + def test_retriever_error_span(self, instrument, span_exporter): + retriever = FakeErrorRetriever() + with pytest.raises(ValueError, match="retriever failure"): + retriever.invoke("fail query") + + spans = span_exporter.get_finished_spans() + error_spans = [ + s for s in spans if s.status.status_code == StatusCode.ERROR + ] + assert len(error_spans) >= 1 + + +class TestRetrieverInputOutputContent: + """Verify retriever query and documents in span attributes.""" + + def test_retrieval_query_captured(self, instrument, span_exporter): + retriever = FakeRetriever() + retriever.invoke("machine learning basics") + + retriever_spans = _find_retriever_spans(span_exporter) + assert len(retriever_spans) >= 1 + attrs = dict(retriever_spans[0].attributes) + + query_val = attrs.get(GEN_AI_RETRIEVAL_QUERY, "") + assert "machine learning basics" in query_val, ( + f"Expected 'machine learning basics' in retrieval.query, got: {query_val}" + ) + + def test_retrieval_documents_captured(self, instrument, span_exporter): + retriever = FakeRetriever() + retriever.invoke("test docs query") + + retriever_spans = _find_retriever_spans(span_exporter) + assert len(retriever_spans) >= 1 + attrs = dict(retriever_spans[0].attributes) + + docs_val = attrs.get(GEN_AI_RETRIEVAL_DOCUMENTS, "") + assert "Result for: test docs query" in docs_val, ( + f"Expected document content in retrieval.documents, got: {docs_val}" + ) + + def test_no_content_when_disabled( + self, instrument_no_content, span_exporter + ): + """When content capture is disabled, query and documents should NOT appear.""" + retriever = FakeRetriever() + retriever.invoke("secret query") + + retriever_spans = _find_retriever_spans(span_exporter) + assert len(retriever_spans) >= 1 + attrs = dict(retriever_spans[0].attributes) + + assert GEN_AI_RETRIEVAL_QUERY not in attrs, ( + "Retrieval query should NOT be captured when content capture is disabled" + ) + assert GEN_AI_RETRIEVAL_DOCUMENTS not in attrs, ( + "Retrieval documents should NOT be captured when content capture is disabled" + ) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_tool_spans.py b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_tool_spans.py new file mode 100644 index 000000000..5c95f9db6 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_tool_spans.py @@ -0,0 +1,134 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Tool span creation and attributes.""" + +import pytest +from langchain_core.tools import tool + +from opentelemetry.instrumentation.langchain.internal.semconv import ( + GEN_AI_TOOL_CALL_ARGUMENTS, + GEN_AI_TOOL_CALL_RESULT, +) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.trace import StatusCode + + +@tool +def add_numbers(a: int, b: int) -> int: + """Add two numbers together.""" + return a + b + + +@tool +def echo_tool(text: str) -> str: + """Echo the input text back.""" + return f"echo: {text}" + + +@tool +def failing_tool(x: str) -> str: + """A tool that always fails.""" + raise ValueError("tool failure") + + +def _find_tool_spans(span_exporter): + spans = span_exporter.get_finished_spans() + return [s for s in spans if "execute_tool" in s.name.lower()] + + +class TestToolSpanCreation: + def test_tool_creates_span(self, instrument, span_exporter): + result = add_numbers.invoke({"a": 1, "b": 2}) + assert result == 3 + + tool_spans = _find_tool_spans(span_exporter) + assert len(tool_spans) >= 1 + + def test_tool_span_has_name(self, instrument, span_exporter): + add_numbers.invoke({"a": 3, "b": 4}) + + tool_spans = _find_tool_spans(span_exporter) + assert len(tool_spans) >= 1 + assert "add_numbers" in tool_spans[0].name + + def test_tool_span_operation_name(self, instrument, span_exporter): + add_numbers.invoke({"a": 1, "b": 1}) + + tool_spans = _find_tool_spans(span_exporter) + assert len(tool_spans) >= 1 + attrs = dict(tool_spans[0].attributes) + assert ( + attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "execute_tool" + ) + + def test_tool_error_span(self, instrument, span_exporter): + with pytest.raises(Exception): + failing_tool.invoke({"x": "fail"}) + + spans = span_exporter.get_finished_spans() + error_spans = [ + s for s in spans if s.status.status_code == StatusCode.ERROR + ] + assert len(error_spans) >= 1 + + +class TestToolInputOutputContent: + """Verify tool input arguments and output result in span attributes.""" + + def test_tool_call_arguments_captured(self, instrument, span_exporter): + echo_tool.invoke({"text": "hello_tool"}) + + tool_spans = _find_tool_spans(span_exporter) + assert len(tool_spans) >= 1 + attrs = dict(tool_spans[0].attributes) + + tool_args = attrs.get(GEN_AI_TOOL_CALL_ARGUMENTS, "") + assert "hello_tool" in tool_args, ( + f"Expected 'hello_tool' in tool.call.arguments, got: {tool_args}" + ) + + def test_tool_call_result_captured(self, instrument, span_exporter): + echo_tool.invoke({"text": "world"}) + + tool_spans = _find_tool_spans(span_exporter) + assert len(tool_spans) >= 1 + attrs = dict(tool_spans[0].attributes) + + tool_result = attrs.get(GEN_AI_TOOL_CALL_RESULT, "") + assert "echo: world" in tool_result, ( + f"Expected 'echo: world' in tool.call.result, got: {tool_result}" + ) + + def test_no_content_when_disabled( + self, instrument_no_content, span_exporter + ): + """When content capture is disabled, tool arguments/result should NOT appear.""" + echo_tool.invoke({"text": "secret"}) + + tool_spans = _find_tool_spans(span_exporter) + assert len(tool_spans) >= 1 + attrs = dict(tool_spans[0].attributes) + + assert GEN_AI_TOOL_CALL_ARGUMENTS not in attrs, ( + "Tool arguments should NOT be captured when content capture is disabled" + ) + assert GEN_AI_TOOL_CALL_RESULT not in attrs, ( + "Tool result should NOT be captured when content capture is disabled" + ) + assert ( + attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "execute_tool" + ) diff --git a/tox-loongsuite.ini b/tox-loongsuite.ini index df6630a13..57ad02bcc 100644 --- a/tox-loongsuite.ini +++ b/tox-loongsuite.ini @@ -36,9 +36,9 @@ envlist = ; py3{9,10,11,12,13}-test-loongsuite-instrumentation-dify ; lint-loongsuite-instrumentation-dify - ; ; loongsuite-instrumentation-langchain - ; py3{9,10,11,12,13}-test-loongsuite-instrumentation-langchain - ; lint-loongsuite-instrumentation-langchain + ; loongsuite-instrumentation-langchain + py3{9,10,11,12,13}-test-loongsuite-instrumentation-langchain-{oldest,latest} + lint-loongsuite-instrumentation-langchain ; ; loongsuite-instrumentation-mcp ; py3{9,10,11,12,13}-test-loongsuite-instrumentation-mcp @@ -100,8 +100,10 @@ deps = loongsuite-dify: {[testenv]test_deps} loongsuite-dify: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-dify/test-requirements.txt - loongsuite-langchain: {[testenv]test_deps} - loongsuite-langchain: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-langchain/test-requirements.txt + langchain-oldest: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.oldest.txt + langchain-latest: {[testenv]test_deps} + langchain-latest: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.latest.txt + lint-loongsuite-instrumentation-langchain: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/requirements.oldest.txt loongsuite-mcp: {[testenv]test_deps} loongsuite-mcp: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-mcp/test-requirements.txt diff --git a/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md b/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md index d60d4303a..7aa9a2f30 100644 --- a/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md +++ b/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md @@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fix `gen_ai.retrieval.query` to respect content capturing mode: when `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` is `NO_CONTENT`, both query and documents are now omitted from retrieve spans (previously only documents were gated) + ([#139](https://github.com/alibaba/loongsuite-python-agent/pull/139)) - Fix `_safe_detach` to use `_RUNTIME_CONTEXT.detach` directly, avoiding noisy `ERROR` log from OTel SDK's `context_api.detach` wrapper ([#135](https://github.com/alibaba/loongsuite-python-agent/pull/135)) - Fix undefined `otel_context` reference in `_multimodal_processing.py` `process_multimodal_fail`, replaced with `_safe_detach` diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/extended_span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/extended_span_utils.py index 484d6bfc7..95ee4c20d 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/extended_span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/extended_span_utils.py @@ -581,8 +581,15 @@ def _apply_retrieve_finish_attributes( # LoongSuite Extension: span kind attributes[GEN_AI_SPAN_KIND] = GenAiSpanKindValues.RETRIEVER.value - # Recommended attributes - if invocation.query is not None: + # Recommended attributes (query is sensitive - controlled by content capturing mode) + if invocation.query is not None and ( + is_experimental_mode() + and get_content_capturing_mode() + in ( + ContentCapturingMode.SPAN_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ) + ): attributes[GEN_AI_RETRIEVAL_QUERY] = invocation.query if invocation.server_address is not None: attributes[ServerAttributes.SERVER_ADDRESS] = invocation.server_address diff --git a/util/opentelemetry-util-genai/tests/test_extended_handler.py b/util/opentelemetry-util-genai/tests/test_extended_handler.py index f95a7af7d..c04e381fd 100644 --- a/util/opentelemetry-util-genai/tests/test_extended_handler.py +++ b/util/opentelemetry-util-genai/tests/test_extended_handler.py @@ -844,6 +844,10 @@ def test_invoke_agent_does_not_emit_event_when_disabled(self): # ==================== Retrieve Documents Tests ==================== + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_ONLY", + ) def test_retrieve_start_and_stop_creates_span(self): with self.telemetry_handler.retrieve() as invocation: invocation.query = "Who is John's father?" @@ -898,6 +902,34 @@ def test_retrieve_without_sensitive_data(self): # Documents should not be present without opt-in self.assertNotIn(GEN_AI_RETRIEVAL_DOCUMENTS, span_attrs) + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="NO_CONTENT", + ) + def test_retrieve_no_content_when_disabled(self): + """When content capture is NO_CONTENT, query and documents should NOT appear.""" + documents = [{"id": "123", "content": "sensitive doc"}] + with self.telemetry_handler.retrieve() as invocation: + invocation.query = "secret query" + invocation.documents = documents + + span = _get_single_span(self.span_exporter) + span_attrs = _get_span_attributes(span) + self.assertNotIn( + GEN_AI_RETRIEVAL_QUERY, + span_attrs, + "Retrieval query should NOT be captured when content capture is disabled", + ) + self.assertNotIn( + GEN_AI_RETRIEVAL_DOCUMENTS, + span_attrs, + "Retrieval documents should NOT be captured when content capture is disabled", + ) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_ONLY", + ) def test_retrieve_manual_start_and_stop(self): invocation = RetrieveInvocation() invocation.query = "manual query"