Skip to content

Commit c17e063

Browse files
github-actions[bot]albertjanYun-Kim
authored
fix(langchain): handle secret str api keys [backport 2.0] (#7479)
Backport 6dc61f5 from #7430 to 2.0. Currently the anthropic chain implementation in langchain uses a pydantic SecretStr as an api key this is causing errors in our pipeline when ddtrace tries to format the api key. With this PR: langchain-ai/langchain#12542 the OpenAI implementation will also start using a SecretStr. I'm sure at that point there will be a few more people asking why things are broken. I'm struggling setting up and running the tests, riot doesn't print anything. And I have no experience with the cassettes testing methods. Can someone help with this? I think if we add a test that uses the Anthropic LLM we will see the failure before. And this will fix it. I've updated the type comment to the function, but the env doesn't know about Pydantic so I don't know if this is a valid thing to do. ## Checklist - [X] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [X] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [X] Change is maintainable (easy to change, telemetry, documentation). - [X] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [X] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. --------- Co-authored-by: Albert-Jan Nijburg <[email protected]> Co-authored-by: Yun Kim <[email protected]> Co-authored-by: Yun Kim <[email protected]>
1 parent da16e3d commit c17e063

File tree

4 files changed

+19
-17
lines changed

4 files changed

+19
-17
lines changed

ddtrace/contrib/langchain/patch.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import langchain
99
from langchain.callbacks.openai_info import get_openai_token_cost_for_model
10+
from pydantic import SecretStr
1011

1112
from ddtrace import config
1213
from ddtrace.constants import ERROR_TYPE
@@ -140,8 +141,11 @@ def _extract_model_name(instance):
140141

141142

142143
def _format_api_key(api_key):
143-
# type: (str) -> str
144+
# type: (str | SecretStr) -> str
144145
"""Obfuscate a given LLM provider API key by returning the last four characters."""
146+
if hasattr(api_key, "get_secret_value"):
147+
api_key = api_key.get_secret_value()
148+
145149
if not api_key or len(api_key) < 4:
146150
return ""
147151
return "...%s" % api_key[-4:]
@@ -695,7 +699,7 @@ def traced_similarity_search(langchain, pin, func, instance, args, kwargs):
695699
instance._index.configuration.server_variables.get("project_name", ""),
696700
)
697701
api_key = instance._index.configuration.api_key.get("ApiKeyAuth", "")
698-
span.set_tag_str(API_KEY, "...%s" % api_key[-4:]) # override api_key for Pinecone
702+
span.set_tag_str(API_KEY, _format_api_key(api_key)) # override api_key for Pinecone
699703
documents = func(*args, **kwargs)
700704
span.set_metric("langchain.response.document_count", len(documents))
701705
for idx, document in enumerate(documents):

docs/spelling_wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ proxying
175175
psutil
176176
psycopg
177177
py
178+
pydantic
178179
pyenv
179180
PyFrameObject
180181
pylibmc
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
langchain: This fix resolves an issue with tagging pydantic `SecretStr` type api keys.

tests/contrib/langchain/test_langchain.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ def langchain(ddtrace_config_langchain, mock_logs, mock_metrics):
5353
with override_config("langchain", ddtrace_config_langchain):
5454
# ensure that mock OpenAI API key is passed in
5555
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
56+
os.environ["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY", "<not-a-real-key>")
57+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN", "<not-a-real-key>")
58+
os.environ["AI21_API_KEY"] = os.getenv("AI21_API_KEY", "<not-a-real-key>")
5659
patch()
5760
import langchain
5861

@@ -1078,9 +1081,7 @@ def test_pinecone_vectorstore_similarity_search(langchain, request_vcr):
10781081
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
10791082
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
10801083
)
1081-
embed = langchain.embeddings.OpenAIEmbeddings(
1082-
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
1083-
)
1084+
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
10841085
index = pinecone.Index(index_name="langchain-retrieval")
10851086
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
10861087
vectorstore.similarity_search("Who was Alan Turing?", 1)
@@ -1100,9 +1101,7 @@ def test_pinecone_vectorstore_retrieval_chain(langchain, request_vcr):
11001101
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
11011102
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
11021103
)
1103-
embed = langchain.embeddings.OpenAIEmbeddings(
1104-
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
1105-
)
1104+
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
11061105
index = pinecone.Index(index_name="langchain-retrieval")
11071106
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
11081107

@@ -1127,9 +1126,7 @@ def test_pinecone_vectorstore_retrieval_chain_39(langchain, request_vcr):
11271126
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
11281127
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
11291128
)
1130-
embed = langchain.embeddings.OpenAIEmbeddings(
1131-
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
1132-
)
1129+
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
11331130
index = pinecone.Index(index_name="langchain-retrieval")
11341131
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
11351132

@@ -1152,9 +1149,7 @@ def test_vectorstore_similarity_search_metrics(langchain, request_vcr, mock_metr
11521149
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
11531150
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
11541151
)
1155-
embed = langchain.embeddings.OpenAIEmbeddings(
1156-
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
1157-
)
1152+
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
11581153
index = pinecone.Index(index_name="langchain-retrieval")
11591154
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
11601155
vectorstore.similarity_search("Who was Alan Turing?", 1)
@@ -1205,9 +1200,7 @@ def test_vectorstore_logs(langchain, ddtrace_config_langchain, request_vcr, mock
12051200
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
12061201
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
12071202
)
1208-
embed = langchain.embeddings.OpenAIEmbeddings(
1209-
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
1210-
)
1203+
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
12111204
index = pinecone.Index(index_name="langchain-retrieval")
12121205
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
12131206
vectorstore.similarity_search("Who was Alan Turing?", 1)

0 commit comments

Comments
 (0)