alibaba · Cirilla-zmh · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-langchain/README.md b/instrumentation-loongsuite/loongsuite-instrumentation-langchain/README.md
@@ -129,7 +129,7 @@ loongsuite-instrument <your_run_command>
 | Agent | `AGENT` | `gen_ai.operation.name=invoke_agent` |
 | ReAct Step | `STEP` | `gen_ai.operation.name=react`, `gen_ai.react.round`, `gen_ai.react.finish_reason` |
 | Tool | `TOOL` | `gen_ai.operation.name=execute_tool` |
-| Retriever | `RETRIEVER` | `gen_ai.operation.name=retrieve_documents` |
+| Retriever | `RETRIEVER` | `gen_ai.operation.name=retrieval` |
 
 ReAct Step spans are created for each Reasoning-Acting iteration, with the hierarchy: Agent > ReAct Step > LLM/Tool. Supported agent types:
 

diff --git a/...instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py b/...instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py
@@ -33,7 +33,7 @@
 * **Chain (Agent)**     → ``handler.start_invoke_agent`` / …
 * **Chain (generic)**   → direct span creation (no ``util-genai``)
 * **Tool**              → ``handler.start_execute_tool`` / …
-* **Retriever**         → ``handler.start_retrieve`` / …
+* **Retriever**         → ``handler.start_retrieval`` / …
 """
 
 from __future__ import annotations
@@ -52,6 +52,7 @@
 from opentelemetry.context import Context
 from opentelemetry.instrumentation.langchain.internal._utils import (
     LANGGRAPH_REACT_STEP_NODE,
+    _documents_to_retrieval_documents,
     _extract_finish_reasons,
     _extract_invocation_params,
     _extract_llm_input_messages,
@@ -84,7 +85,7 @@
 from opentelemetry.util.genai.extended_types import (
     ExecuteToolInvocation,
     InvokeAgentInvocation,
-    RetrieveInvocation,
+    RetrievalInvocation,
 )
 from opentelemetry.util.genai.handler import _safe_detach
 from opentelemetry.util.genai.types import (
@@ -626,8 +627,8 @@ def _on_retriever_start(self, run: Run) -> None:
             inputs = getattr(run, "inputs", None) or {}
             query = inputs.get("query") or ""
 
-            invocation = RetrieveInvocation(query=query)
-            self._handler.start_retrieve(invocation, context=parent_ctx)
+            invocation = RetrievalInvocation(query=query)
+            self._handler.start_retrieval(invocation, context=parent_ctx)
             rd = _RunData(
                 run_kind="retriever",
                 span=invocation.span,
@@ -647,12 +648,12 @@ def _on_retriever_end(self, run: Run) -> None:
         if rd is None or rd.run_kind != "retriever":
             return
         try:
-            inv: RetrieveInvocation = rd.invocation
+            inv: RetrievalInvocation = rd.invocation
             outputs = getattr(run, "outputs", None) or {}
             documents = outputs.get("documents") or []
             if documents:
-                inv.documents = _safe_json(documents)
-            self._handler.stop_retrieve(inv)
+                inv.documents = _documents_to_retrieval_documents(documents)
+            self._handler.stop_retrieval(inv)
         except Exception:
             logger.debug("Failed to stop Retriever span", exc_info=True)
 
@@ -663,7 +664,7 @@ def _on_retriever_error(self, run: Run) -> None:
             return
         try:
             err_str = getattr(run, "error", None) or "Unknown error"
-            self._handler.fail_retrieve(
+            self._handler.fail_retrieval(
                 rd.invocation,
                 Error(message=str(err_str), type=Exception),
             )

diff --git a/...-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_utils.py b/...-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_utils.py
@@ -22,6 +22,7 @@
     FunctionToolDefinition,
     InputMessage,
     OutputMessage,
+    RetrievalDocument,
     Text,
     ToolCall,
     ToolCallResponse,
@@ -405,6 +406,49 @@ def _extract_response_model(run: Any) -> str | None:
     return llm_output.get("model_name") or llm_output.get("model")
 
 
+# ---------------------------------------------------------------------------
+# Retriever document conversion
+# ---------------------------------------------------------------------------
+
+
+def _documents_to_retrieval_documents(documents: Any) -> list:
+    """Convert retriever output documents to List[RetrievalDocument].
+
+    Accepts LangChain Document objects (page_content, metadata) or similar.
+    Extracts id from doc.id, metadata.id, metadata.doc_id, metadata.document_id.
+    Extracts score from metadata.score, metadata.relevance_score, metadata.similarity_score.
+    """
+
+    result = []
+    if not documents:
+        return result
+    for doc in documents:
+        meta = getattr(doc, "metadata", None) or {}
+        doc_id = (
+            getattr(doc, "id", None)
+            or meta.get("id")
+            or meta.get("doc_id")
+            or meta.get("document_id")
+        )
+        score = (
+            meta.get("score")
+            or meta.get("relevance_score")
+            or meta.get("similarity_score")
+        )
+        content = getattr(doc, "page_content", None) or getattr(
+            doc, "content", None
+        )
+        result.append(
+            RetrievalDocument(
+                id=doc_id,
+                score=score,
+                content=content,
+                metadata=meta if meta else None,
+            )
+        )
+    return result
+
+
 # ---------------------------------------------------------------------------
 # JSON serialisation helper
 # ---------------------------------------------------------------------------

diff --git a/...instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/semconv.py b/...instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/semconv.py
@@ -24,7 +24,7 @@
 )
 from opentelemetry.util.genai._extended_semconv.gen_ai_extended_attributes import (  # noqa: E501
     GEN_AI_RETRIEVAL_DOCUMENTS,
-    GEN_AI_RETRIEVAL_QUERY,
+    GEN_AI_RETRIEVAL_QUERY_TEXT,
     GEN_AI_SPAN_KIND,
     GEN_AI_TOOL_CALL_ARGUMENTS,
     GEN_AI_TOOL_CALL_RESULT,
@@ -38,7 +38,7 @@
     "GEN_AI_OPERATION_NAME",
     "GEN_AI_TOOL_CALL_ID",
     "GEN_AI_RETRIEVAL_DOCUMENTS",
-    "GEN_AI_RETRIEVAL_QUERY",
+    "GEN_AI_RETRIEVAL_QUERY_TEXT",
     "GEN_AI_SPAN_KIND",
     "GEN_AI_TOOL_CALL_ARGUMENTS",
     "GEN_AI_TOOL_CALL_RESULT",

diff --git a/...tion-loongsuite/loongsuite-instrumentation-langchain/tests/test_langchain_instrumentor.py b/...tion-loongsuite/loongsuite-instrumentation-langchain/tests/test_langchain_instrumentor.py
@@ -41,7 +41,7 @@
 from opentelemetry.instrumentation.langchain.internal.semconv import (
     GEN_AI_OPERATION_NAME,
     GEN_AI_RETRIEVAL_DOCUMENTS,
-    GEN_AI_RETRIEVAL_QUERY,
+    GEN_AI_RETRIEVAL_QUERY_TEXT,
     GEN_AI_SPAN_KIND,
     INPUT_VALUE,
     OUTPUT_VALUE,
@@ -250,14 +250,21 @@ def test_retrieval_qa_chain_spans(
         assert sd_span.status.status_code == StatusCode.ERROR
     assert not sd_attrs or set(sd_attrs.keys()) <= {"metadata"}
 
-    # Retriever span: name is "retrieve_documents"
-    retriever_span = spans_by_name.get("retrieve_documents")
+    # Retriever span: name is "retrieval" (or "retrieval {data_source_id}" when set)
+    retriever_span = spans_by_name.get("retrieval") or next(
+        (
+            s
+            for s in span_exporter.get_finished_spans()
+            if s.name.startswith("retrieval")
+        ),
+        None,
+    )
     assert retriever_span is not None
     assert retriever_span.parent is not None
     assert retriever_span.parent.span_id == rqa_span.context.span_id
     retriever_attrs = dict(retriever_span.attributes or {})
     assert retriever_attrs.pop(GEN_AI_SPAN_KIND, None) == "RETRIEVER"
-    assert retriever_attrs.pop(GEN_AI_RETRIEVAL_QUERY, None) == question
+    assert retriever_attrs.pop(GEN_AI_RETRIEVAL_QUERY_TEXT, None) == question
     docs_val = retriever_attrs.pop(GEN_AI_RETRIEVAL_DOCUMENTS, None)
     assert docs_val is not None
     for text in documents:

diff --git a/...rumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_retriever_spans.py b/...rumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_retriever_spans.py
@@ -23,7 +23,7 @@
 
 from opentelemetry.instrumentation.langchain.internal.semconv import (
     GEN_AI_RETRIEVAL_DOCUMENTS,
-    GEN_AI_RETRIEVAL_QUERY,
+    GEN_AI_RETRIEVAL_QUERY_TEXT,
 )
 from opentelemetry.trace import StatusCode
 
@@ -61,7 +61,7 @@ def _get_relevant_documents(
 
 def _find_retriever_spans(span_exporter):
     spans = span_exporter.get_finished_spans()
-    return [s for s in spans if "retrieve" in s.name.lower()]
+    return [s for s in spans if "retrieval" in s.name.lower()]
 
 
 class TestRetrieverSpanCreation:
@@ -96,9 +96,9 @@ def test_retrieval_query_captured(self, instrument, span_exporter):
         assert len(retriever_spans) >= 1
         attrs = dict(retriever_spans[0].attributes)
 
-        query_val = attrs.get(GEN_AI_RETRIEVAL_QUERY, "")
+        query_val = attrs.get(GEN_AI_RETRIEVAL_QUERY_TEXT, "")
         assert "machine learning basics" in query_val, (
-            f"Expected 'machine learning basics' in retrieval.query, got: {query_val}"
+            f"Expected 'machine learning basics' in retrieval.query.text, got: {query_val}"
         )
 
     def test_retrieval_documents_captured(self, instrument, span_exporter):
@@ -117,17 +117,19 @@ def test_retrieval_documents_captured(self, instrument, span_exporter):
     def test_no_content_when_disabled(
         self, instrument_no_content, span_exporter
     ):
-        """When content capture is disabled, query and documents should NOT appear."""
+        """When content capture is NO_CONTENT: query omitted; documents record id and score only."""
         retriever = FakeRetriever()
         retriever.invoke("secret query")
 
         retriever_spans = _find_retriever_spans(span_exporter)
         assert len(retriever_spans) >= 1
         attrs = dict(retriever_spans[0].attributes)
 
-        assert GEN_AI_RETRIEVAL_QUERY not in attrs, (
-            "Retrieval query should NOT be captured when content capture is disabled"
+        assert GEN_AI_RETRIEVAL_QUERY_TEXT not in attrs, (
+            "Query should NOT be captured when content capture is disabled"
         )
-        assert GEN_AI_RETRIEVAL_DOCUMENTS not in attrs, (
-            "Retrieval documents should NOT be captured when content capture is disabled"
+        # Documents are recorded with id and score only (no content) when NO_CONTENT
+        docs_val = attrs.get(GEN_AI_RETRIEVAL_DOCUMENTS, "")
+        assert "secret query" not in docs_val, (
+            "Document content should NOT be captured when NO_CONTENT"
         )
diff --git a/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md b/util/opentelemetry-util-genai/CHANGELOG-loongsuite.md
@@ -9,13 +9,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Add `RetrievalDocument` dataclass for typed retrieval document representation (id, score, content, metadata). 
+  ([#143](https://github.com/alibaba/loongsuite-python-agent/pull/143))
+- Control RetrievalDocument serialization: when content capturing is NO_CONTENT, only serialize id and score; when SPAN_ONLY/SPAN_AND_EVENT, serialize full (id, score, content, metadata)
+  ([#143](https://github.com/alibaba/loongsuite-python-agent/pull/143))
 - Add Entry span (`gen_ai.span.kind=ENTRY`) and ReAct Step span (`gen_ai.span.kind=STEP`) support in `ExtendedTelemetryHandler` with types, utilities, and context-manager APIs
   ([#135](https://github.com/alibaba/loongsuite-python-agent/pull/135))
 - Propagate `gen_ai.session.id` and `gen_ai.user.id` into Baggage during `start_entry`, enabling traffic coloring via `BaggageSpanProcessor` for all child spans within the entry block
   ([#135](https://github.com/alibaba/loongsuite-python-agent/pull/135))
 
 ### Changed
 
+- **Retrieval semantic convention**: Align retrieval spans with LoongSuite spec
+  ([#143](https://github.com/alibaba/loongsuite-python-agent/pull/143))
+  - `gen_ai.operation.name`: `retrieve_documents` → `retrieval`
+  - `gen_ai.retrieval.query` → `gen_ai.retrieval.query.text` for query text
+  - Span name: `retrieval {gen_ai.data_source.id}` when `data_source_id` is set
+  - Add `RetrievalInvocation` fields: `data_source_id`, `provider`, `request_model`, `top_k`
 - Add optional `context` parameter to all `start_*` methods in `TelemetryHandler` and `ExtendedTelemetryHandler` for explicit parent-child span linking
   ([#135](https://github.com/alibaba/loongsuite-python-agent/pull/135))
 - Unify `attach`/`detach` strategy in `ExtendedTelemetryHandler`: always `attach` regardless of whether `context` is provided; `stop_*`/`fail_*` guards restored to `context_token is None or span is None`

diff --git a/util/opentelemetry-util-genai/README-loongsuite.rst b/util/opentelemetry-util-genai/README-loongsuite.rst
@@ -13,7 +13,7 @@ LoongSuite 扩展为 OpenTelemetry GenAI Util 包提供了额外的 Generative A
 - **create_agent**: Agent 创建操作
 - **embedding**: 向量嵌入生成操作
 - **execute_tool**: 工具执行操作
-- **retrieve**: 文档检索操作（向量数据库查询）
+- **retrieval**: 文档检索操作（向量数据库查询）
 - **rerank**: 文档重排序操作
 - **memory**: 记忆操作，支持记忆的增删改查等操作
 - **entry**: AI 应用系统入口标识，支持 session_id/user_id 的 Baggage 传播
@@ -352,36 +352,46 @@ Token 使用:
         invocation.tool_call_result = result
 
 
-6. 文档检索 (retrieve)
+6. 文档检索 (retrieval)
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 用于跟踪从向量数据库或搜索系统检索文档的操作。
 
 **支持的属性:**
 
-- ``gen_ai.operation.name``: 操作名称，固定为 "retrieve"
-- ``gen_ai.provider.name``: 提供商名称
-- ``gen_ai.retrieval.query``: 检索查询字符串（受内容捕获模式控制）
-- ``gen_ai.retrieval.documents``: 检索到的文档（受内容捕获模式控制）
+- ``gen_ai.operation.name``: 操作名称，固定为 "retrieval"
+- ``gen_ai.span.kind``: 固定为 "RETRIEVER"
+- ``gen_ai.data_source.id``: 数据源唯一标识（有条件时必须）
+- ``gen_ai.provider.name``: 提供商名称（有条件时必须）
+- ``gen_ai.request.model``: 请求模型（有条件时必须）
+- ``gen_ai.request.top_k``: 请求 topK（推荐）
+- ``gen_ai.retrieval.query.text``: 检索内容短句（可选，受内容捕获模式控制）
+- ``gen_ai.retrieval.documents``: 召回的文档列表，格式 [{"id": str, "score": float}, ...]（可选，受内容捕获模式控制）
+
+**Span 命名:** ``retrieval {gen_ai.data_source.id}``，无 data_source_id 时为 ``retrieval``
+
+**文档格式:** 使用 ``List[RetrievalDocument]``，instrumentation 需将框架类型（如 LangChain Document）转换为 ``RetrievalDocument``。当 OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT 为 NO_CONTENT 时仅记录 id 和 score；SPAN_ONLY/SPAN_AND_EVENT 时记录完整。
 
 **使用示例:**
 
 ::
 
-    from opentelemetry.util.genai.extended_types import RetrieveInvocation
+    from opentelemetry.util.genai.extended_types import RetrievalInvocation, RetrievalDocument
 
-    with handler.retrieve() as invocation:
+    with handler.retrieval() as invocation:
         invocation.provider = "chroma"
-        invocation.retrieval_query = "什么是 OpenTelemetry?"
+        invocation.data_source_id = "H7STPQYOND"
+        invocation.query = "什么是 OpenTelemetry?"
+        invocation.top_k = 5.0
 
         # 执行检索...
-        invocation.retrieval_documents = [
-            {"id": "doc1", "content": "OpenTelemetry 是一个观测性框架...", "score": 0.95},
-            {"id": "doc2", "content": "OpenTelemetry 提供统一的 API...", "score": 0.88}
+        invocation.documents = [
+            RetrievalDocument(id="doc1", score=0.95, content="...", metadata={}),
+            RetrievalDocument(id="doc2", score=0.88, content="...", metadata={}),
         ]
 
 
-7. 文档重排序 (rerank)
+1. 文档重排序 (rerank)
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 用于跟踪文档重排序操作，支持基于模型和基于 LLM 的重排序器。
@@ -797,13 +807,18 @@ Baggage 中已有同名 key，则会被覆盖。
         tool_inv.tool_call_result = {"products": [...]}
 
     # 检索相关文档
-    with handler.retrieve() as retrieve_inv:
-        retrieve_inv.provider = "chroma"
-        retrieve_inv.retrieval_query = "笔记本电脑推荐"
+    from opentelemetry.util.genai.extended_types import RetrievalDocument
+    with handler.retrieval() as retrieval_inv:
+        retrieval_inv.provider = "chroma"
+        retrieval_inv.data_source_id = "my_vector_store"
+        retrieval_inv.query = "笔记本电脑推荐"
 
         # 执行检索...
 
-        retrieve_inv.retrieval_documents = [...]
+        retrieval_inv.documents = [
+            RetrievalDocument(id="doc1", score=0.95, content="...", metadata={}),
+            RetrievalDocument(id="doc2", score=0.88, content="...", metadata={}),
+        ]
 
     # 重排序结果
     with handler.rerank() as rerank_inv:

diff --git a/...y-util-genai/src/opentelemetry/util/genai/_extended_semconv/gen_ai_extended_attributes.py b/...y-util-genai/src/opentelemetry/util/genai/_extended_semconv/gen_ai_extended_attributes.py
@@ -43,10 +43,10 @@
 The result returned by the tool after execution.
 """
 
-# Retrieve attributes
-GEN_AI_RETRIEVAL_QUERY: Final = "gen_ai.retrieval.query"
+# Retrieval attributes
+GEN_AI_RETRIEVAL_QUERY_TEXT: Final = "gen_ai.retrieval.query.text"
 """
-The query string used to retrieve documents from a vector database or search system.
+The retrieval query text (short phrase). Per LoongSuite semantic convention.
 """
 
 GEN_AI_RETRIEVAL_DOCUMENTS: Final = "gen_ai.retrieval.documents"
@@ -191,8 +191,8 @@ class GenAiSpanKindValues(Enum):
 
 
 class GenAiExtendedOperationNameValues(Enum):
-    RETRIEVE_DOCUMENTS = "retrieve_documents"
-    """Retrieve documents operation."""
+    RETRIEVAL = "retrieval"
+    """Retrieval operation (vector store / database lookup). Per LoongSuite semantic convention."""
 
     RERANK_DOCUMENTS = "rerank_documents"
     """Rerank documents operation."""