fix(langchain): use correct keyword argument name for chat messages [backport 1.17] (#6505)

github-actions[bot] · Yun-Kim · web-flow · commit 09b4699a0377 · 2023-07-27T20:46:53.000Z
Backport 19155cf from #6466 to 1.17. Resolves #6464. We were not using the correct keyword argument name for chat model messages, as well as for embeddings (we use the same method to trace `embed_query(text)` and `embed_documents(texts)`, and we previously only looked for `text`). This PR fixes this issue and adds test coverage for keyword argument usage into our existing test cases. For reference, below is the current code coverage for the langchain integration test suite. It is not the ideal 100% we would like it to be, but currently sitting at 93% is acceptable at this moment, especially considering the complexity/ambiguity of the integration and different model providers having different internal parameters. ``` Name Stmts Miss Cover ddtrace/contrib/langchain/patch.py 440 32 93% Missing coverage for lines: 36, 181, 280, 301-304, 309-312, 355-356, 446-447, 467-470, 475-478, 630-633, 638-643, 677 ``` ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com>
diff --git a/ddtrace/contrib/langchain/patch.py b/ddtrace/contrib/langchain/patch.py
@@ -25,6 +25,7 @@
 from ddtrace.contrib.trace_utils import wrap
 from ddtrace.internal.agent import get_stats_url
 from ddtrace.internal.logger import get_logger
+from ddtrace.internal.utils import ArgumentError
 from ddtrace.internal.utils import get_argument_value
 from ddtrace.internal.utils.formats import asbool
 from ddtrace.internal.utils.formats import deep_getattr
@@ -326,7 +327,7 @@ async def traced_llm_agenerate(langchain, pin, func, instance, args, kwargs):
 @with_traced_module
 def traced_chat_model_generate(langchain, pin, func, instance, args, kwargs):
     llm_provider = instance._llm_type.split("-")[0]
-    chat_messages = get_argument_value(args, kwargs, 0, "chat_messages")
+    chat_messages = get_argument_value(args, kwargs, 0, "messages")
     integration = langchain._datadog_integration
     span = integration.trace(
         pin,
@@ -417,7 +418,7 @@ def traced_chat_model_generate(langchain, pin, func, instance, args, kwargs):
 @with_traced_module
 async def traced_chat_model_agenerate(langchain, pin, func, instance, args, kwargs):
     llm_provider = instance._llm_type.split("-")[0]
-    chat_messages = get_argument_value(args, kwargs, 0, "chat_messages")
+    chat_messages = get_argument_value(args, kwargs, 0, "messages")
     integration = langchain._datadog_integration
     span = integration.trace(
         pin,
@@ -507,7 +508,15 @@ async def traced_chat_model_agenerate(langchain, pin, func, instance, args, kwar
 
 @with_traced_module
 def traced_embedding(langchain, pin, func, instance, args, kwargs):
-    input_texts = get_argument_value(args, kwargs, 0, "text")
+    """
+    This traces both embed_query(text) and embed_documents(texts), so we need to make sure
+    we get the right arg/kwarg.
+    """
+    try:
+        input_texts = get_argument_value(args, kwargs, 0, "texts")
+    except ArgumentError:
+        input_texts = get_argument_value(args, kwargs, 0, "text")
+
     provider = instance.__class__.__name__.split("Embeddings")[0].lower()
     integration = langchain._datadog_integration
     span = integration.trace(
@@ -559,7 +568,7 @@ def traced_chain_call(langchain, pin, func, instance, args, kwargs):
     span = integration.trace(pin, "%s.%s" % (instance.__module__, instance.__class__.__name__), interface_type="chain")
     final_outputs = {}
     try:
-        inputs = args[0]
+        inputs = get_argument_value(args, kwargs, 0, "inputs")
         if not isinstance(inputs, dict):
             inputs = {instance.input_keys[0]: inputs}
         if integration.is_pc_sampled_span(span):
@@ -605,7 +614,7 @@ async def traced_chain_acall(langchain, pin, func, instance, args, kwargs):
     span = integration.trace(pin, "%s.%s" % (instance.__module__, instance.__class__.__name__), interface_type="chain")
     final_outputs = {}
     try:
-        inputs = args[0]
+        inputs = get_argument_value(args, kwargs, 0, "inputs")
         if not isinstance(inputs, dict):
             inputs = {instance.input_keys[0]: inputs}
         if integration.is_pc_sampled_span(span):
diff --git a/releasenotes/notes/fix-langchain-chat-messages-72b5292211c0044c.yaml b/releasenotes/notes/fix-langchain-chat-messages-72b5292211c0044c.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    langchain: This fix resolves an issue where chat messages and embedding arguments
+     passed in as keyword arguments were not parsed correctly and resulted in an ``ArgumentError``.
diff --git a/tests/contrib/langchain/test_langchain.py b/tests/contrib/langchain/test_langchain.py
@@ -181,7 +181,7 @@ def test_openai_llm_sync_multiple_prompts(langchain, request_vcr):
     llm = langchain.llms.OpenAI()
     with request_vcr.use_cassette("openai_completion_sync_multi_prompt.yaml"):
         llm.generate(
-            [
+            prompts=[
                 "What is the best way to teach a baby multiple languages?",
                 "How many times has Spongebob failed his road test?",
             ]
@@ -376,7 +376,7 @@ def test_llm_logs(langchain, ddtrace_config_langchain, request_vcr, mock_logs, m
 def test_openai_chat_model_sync_call(langchain, request_vcr):
     chat = langchain.chat_models.ChatOpenAI(temperature=0, max_tokens=256)
     with request_vcr.use_cassette("openai_chat_completion_sync_call.yaml"):
-        chat([langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")])
+        chat(messages=[langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")])
 
 
 @pytest.mark.skipif(sys.version_info >= (3, 10, 0), reason="Python 3.9 specific test")
@@ -622,13 +622,13 @@ def test_openai_embedding_document(langchain, request_vcr):
 @pytest.mark.snapshot
 def test_fake_embedding_query(langchain):
     embeddings = langchain.embeddings.FakeEmbeddings(size=99)
-    embeddings.embed_query("foo")
+    embeddings.embed_query(text="foo")
 
 
 @pytest.mark.snapshot
 def test_fake_embedding_document(langchain):
     embeddings = langchain.embeddings.FakeEmbeddings(size=99)
-    embeddings.embed_documents(["foo", "bar"])
+    embeddings.embed_documents(texts=["foo", "bar"])
 
 
 def test_openai_embedding_metrics(langchain, request_vcr, mock_metrics, mock_logs, snapshot_tracer):

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +fixes:
 +  - |
 +    langchain: This fix resolves an issue where chat messages and embedding arguments
 +     passed in as keyword arguments were not parsed correctly and resulted in an ``ArgumentError``.