Skip to content

Commit a116e8f

Browse files
authored
INTPYTHON-461 - Add additional client metadata on LangChain Integrations (#183)
1 parent f89faf2 commit a116e8f

File tree

19 files changed

+115
-57
lines changed

19 files changed

+115
-57
lines changed

libs/langchain-mongodb/langchain_mongodb/agent_toolkit/database.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import json
66
import re
77
from datetime import date, datetime
8-
from importlib.metadata import version
98
from typing import Any, Dict, Iterable, List, Optional, Union
109

1110
from bson import ObjectId
@@ -14,9 +13,10 @@
1413
from bson.json_util import dumps
1514
from pymongo import MongoClient
1615
from pymongo.cursor import Cursor
17-
from pymongo.driver_info import DriverInfo
1816
from pymongo.errors import PyMongoError
1917

18+
from langchain_mongodb.utils import DRIVER_METADATA, _append_client_metadata
19+
2020
NUM_DOCUMENTS_TO_SAMPLE = 4
2121
MAX_STRING_LENGTH_OF_SAMPLE_DOCUMENT_VALUE = 20
2222

@@ -62,6 +62,8 @@ def __init__(
6262
self._sample_docs_in_coll_info = sample_docs_in_collection_info
6363
self._indexes_in_coll_info = indexes_in_collection_info
6464

65+
_append_client_metadata(self._client)
66+
6567
@classmethod
6668
def from_connection_string(
6769
cls,
@@ -72,7 +74,7 @@ def from_connection_string(
7274
"""Construct a MongoDBDatabase from URI."""
7375
client: MongoClient[dict[str, Any]] = MongoClient(
7476
connection_string,
75-
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
77+
driver=DRIVER_METADATA,
7678
)
7779
database = database or client.get_default_database().name
7880
return cls(client, database, **kwargs)

libs/langchain-mongodb/langchain_mongodb/cache.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import json
44
import logging
55
import time
6-
from importlib.metadata import version
76
from typing import Any, Callable, Dict, Optional, Union
87

98
from langchain_core.caches import RETURN_VAL_TYPE, BaseCache
@@ -14,8 +13,8 @@
1413
from pymongo import MongoClient
1514
from pymongo.collection import Collection
1615
from pymongo.database import Database
17-
from pymongo.driver_info import DriverInfo
1816

17+
from langchain_mongodb.utils import DRIVER_METADATA
1918
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
2019

2120
logger = logging.getLogger(__file__)
@@ -211,7 +210,7 @@ def clear(self, **kwargs: Any) -> None:
211210
def _generate_mongo_client(connection_string: str) -> MongoClient:
212211
return MongoClient(
213212
connection_string,
214-
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
213+
driver=DRIVER_METADATA,
215214
)
216215

217216

libs/langchain-mongodb/langchain_mongodb/chat_message_histories.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import json
22
import logging
3-
from importlib.metadata import version
43
from typing import Dict, List, Optional
54

65
from langchain_core.chat_history import BaseChatMessageHistory
@@ -10,7 +9,8 @@
109
messages_from_dict,
1110
)
1211
from pymongo import MongoClient, errors
13-
from pymongo.driver_info import DriverInfo
12+
13+
from langchain_mongodb.utils import DRIVER_METADATA, _append_client_metadata
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -112,13 +112,12 @@ def __init__(
112112
if connection_string:
113113
raise ValueError("Must provide connection_string or client, not both")
114114
self.client = client
115+
_append_client_metadata(self.client)
115116
elif connection_string:
116117
try:
117118
self.client = MongoClient(
118119
connection_string,
119-
driver=DriverInfo(
120-
name="Langchain", version=version("langchain-mongodb")
121-
),
120+
driver=DRIVER_METADATA,
122121
)
123122
except errors.ConnectionFailure as error:
124123
logger.error(error)

libs/langchain-mongodb/langchain_mongodb/docstores.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
from __future__ import annotations
22

3-
from importlib.metadata import version
43
from typing import Any, Generator, Iterable, Iterator, List, Optional, Sequence, Union
54

65
from langchain_core.documents import Document
76
from langchain_core.stores import BaseStore
87
from pymongo import MongoClient
98
from pymongo.collection import Collection
10-
from pymongo.driver_info import DriverInfo
119

1210
from langchain_mongodb.utils import (
11+
DRIVER_METADATA,
12+
_append_client_metadata,
1313
make_serializable,
1414
)
1515

@@ -37,6 +37,8 @@ def __init__(self, collection: Collection, text_key: str = "page_content") -> No
3737
self.collection = collection
3838
self._text_key = text_key
3939

40+
_append_client_metadata(self.collection.database.client)
41+
4042
@classmethod
4143
def from_connection_string(
4244
cls,
@@ -55,7 +57,7 @@ def from_connection_string(
5557
"""
5658
client: MongoClient = MongoClient(
5759
connection_string,
58-
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
60+
driver=DRIVER_METADATA,
5961
)
6062
db_name, collection_name = namespace.split(".")
6163
collection = client[db_name][collection_name]

libs/langchain-mongodb/langchain_mongodb/graphrag/graph.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import json
44
import logging
55
from copy import deepcopy
6-
from importlib.metadata import version
76
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
87

98
from langchain_core.documents import Document
@@ -12,12 +11,12 @@
1211
from langchain_core.prompts.chat import ChatPromptTemplate
1312
from pymongo import MongoClient, UpdateOne
1413
from pymongo.collection import Collection
15-
from pymongo.driver_info import DriverInfo
1614
from pymongo.errors import OperationFailure
1715
from pymongo.results import BulkWriteResult
1816

1917
from langchain_mongodb.graphrag import example_templates, prompts
2018

19+
from ..utils import DRIVER_METADATA, _append_client_metadata
2120
from .prompts import rag_prompt
2221
from .schema import entity_schema
2322

@@ -140,9 +139,7 @@ def __init__(
140139
assert database_name is not None
141140
client: MongoClient = MongoClient(
142141
connection_string,
143-
driver=DriverInfo(
144-
name="Langchain", version=version("langchain-mongodb")
145-
),
142+
driver=DRIVER_METADATA,
146143
)
147144
db = client[database_name]
148145
if collection_name not in db.list_collection_names():
@@ -186,6 +183,9 @@ def __init__(
186183
)
187184
self.collection = collection
188185

186+
# append_metadata was added in PyMongo 4.14.0, but is a valid database name on earlier versions
187+
_append_client_metadata(collection.database.client)
188+
189189
self.entity_extraction_model = entity_extraction_model
190190
self.entity_prompt = (
191191
prompts.entity_prompt if entity_prompt is None else entity_prompt
@@ -268,7 +268,7 @@ def from_connection_string(
268268
"""
269269
client: MongoClient = MongoClient(
270270
connection_string,
271-
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
271+
driver=DRIVER_METADATA,
272272
)
273273
collection = client[database_name].create_collection(collection_name)
274274
return cls(

libs/langchain-mongodb/langchain_mongodb/indexes.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@
33

44
import functools
55
import warnings
6-
from importlib.metadata import version
76
from typing import Any, Dict, List, Optional, Sequence
87

98
from langchain_core.indexing.base import RecordManager
109
from langchain_core.runnables.config import run_in_executor
1110
from pymongo import MongoClient
1211
from pymongo.collection import Collection
13-
from pymongo.driver_info import DriverInfo
1412
from pymongo.errors import OperationFailure
1513

14+
from langchain_mongodb.utils import DRIVER_METADATA, _append_client_metadata
15+
1616

1717
class MongoDBRecordManager(RecordManager):
1818
"""A MongoDB-based implementation of the record manager."""
@@ -36,6 +36,8 @@ def __init__(self, collection: Collection) -> None:
3636
super().__init__(namespace=namespace)
3737
self._collection = collection
3838

39+
_append_client_metadata(self._collection.database.client)
40+
3941
@classmethod
4042
def from_connection_string(
4143
cls, connection_string: str, namespace: str
@@ -51,7 +53,7 @@ def from_connection_string(
5153
"""
5254
client: MongoClient = MongoClient(
5355
connection_string,
54-
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
56+
driver=DRIVER_METADATA,
5557
)
5658
db_name, collection_name = namespace.split(".")
5759
collection = client[db_name][collection_name]

libs/langchain-mongodb/langchain_mongodb/loaders.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
from __future__ import annotations
33

44
import logging
5-
from importlib.metadata import version
65
from typing import Any, Dict, List, Optional, Sequence
76

87
from langchain_community.document_loaders.base import BaseLoader
98
from langchain_core.documents import Document
109
from langchain_core.runnables.config import run_in_executor
1110
from pymongo import MongoClient
1211
from pymongo.collection import Collection
13-
from pymongo.driver_info import DriverInfo
12+
13+
from langchain_mongodb.utils import DRIVER_METADATA, _append_client_metadata
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -53,6 +53,9 @@ def __init__(
5353
self.metadata_names = metadata_names or []
5454
self.include_db_collection_in_metadata = include_db_collection_in_metadata
5555

56+
# append_metadata was added in PyMongo 4.14.0, but is a valid database name on earlier versions
57+
_append_client_metadata(self.db.client)
58+
5659
@classmethod
5760
def from_connection_string(
5861
cls,
@@ -84,7 +87,7 @@ def from_connection_string(
8487
"""
8588
client: MongoClient[dict[str, Any]] = MongoClient(
8689
connection_string,
87-
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
90+
driver=DRIVER_METADATA,
8891
)
8992
collection = client[db_name][collection_name]
9093
return MongoDBLoader(

libs/langchain-mongodb/langchain_mongodb/retrievers/full_text_search.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pymongo.collection import Collection
88

99
from langchain_mongodb.pipelines import text_search_stage
10-
from langchain_mongodb.utils import make_serializable
10+
from langchain_mongodb.utils import _append_client_metadata, make_serializable
1111

1212

1313
class MongoDBAtlasFullTextSearchRetriever(BaseRetriever):
@@ -28,6 +28,7 @@ class MongoDBAtlasFullTextSearchRetriever(BaseRetriever):
2828
top_k: Annotated[
2929
Optional[int], Field(deprecated='top_k is deprecated, use "k" instead')
3030
] = None
31+
_added_metadata: bool = False
3132
"""Number of documents to return. Default is no limit"""
3233

3334
def close(self) -> None:
@@ -55,6 +56,10 @@ def _get_relevant_documents(
5556
include_scores=self.include_scores,
5657
)
5758

59+
if not self._added_metadata:
60+
_append_client_metadata(self.collection.database.client)
61+
self._added_metadata = True
62+
5863
# Execution
5964
cursor = self.collection.aggregate(pipeline) # type: ignore[arg-type]
6065

libs/langchain-mongodb/langchain_mongodb/retrievers/parent_document.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
from importlib.metadata import version
43
from typing import Any, List, Optional
54

65
import pymongo
@@ -14,12 +13,11 @@
1413
from langchain_core.runnables import run_in_executor
1514
from langchain_text_splitters import TextSplitter
1615
from pymongo import MongoClient
17-
from pymongo.driver_info import DriverInfo
1816

1917
from langchain_mongodb import MongoDBAtlasVectorSearch
2018
from langchain_mongodb.docstores import MongoDBDocStore
2119
from langchain_mongodb.pipelines import vector_search_stage
22-
from langchain_mongodb.utils import make_serializable
20+
from langchain_mongodb.utils import DRIVER_METADATA, make_serializable
2321

2422

2523
class MongoDBAtlasParentDocumentRetriever(ParentDocumentRetriever):
@@ -168,7 +166,7 @@ def from_connection_string(
168166
"""
169167
client: MongoClient = MongoClient(
170168
connection_string,
171-
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
169+
driver=DRIVER_METADATA,
172170
)
173171
collection = client[database_name][collection_name]
174172
vectorstore = MongoDBAtlasVectorSearch(

libs/langchain-mongodb/langchain_mongodb/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,25 @@
2020

2121
import logging
2222
from datetime import date, datetime
23+
from importlib.metadata import version
2324
from typing import Any, Dict, List, Union
2425

2526
import numpy as np
27+
from pymongo import MongoClient
28+
from pymongo.driver_info import DriverInfo
2629

2730
logger = logging.getLogger(__name__)
2831

2932
Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
3033

34+
DRIVER_METADATA = DriverInfo(name="Langchain", version=version("langchain-mongodb"))
35+
36+
37+
def _append_client_metadata(client: MongoClient) -> None:
38+
# append_metadata was added in PyMongo 4.14.0, but is a valid database name on earlier versions
39+
if callable(client.append_metadata):
40+
client.append_metadata(DRIVER_METADATA)
41+
3142

3243
def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
3344
"""Row-wise cosine similarity between two equal-width matrices."""

0 commit comments

Comments
 (0)