Skip to content

Commit fb171c0

Browse files
lrafeeimergify[bot]TimPansino
authored
Add Vectorstore instrumentation automation (#1279)
* Tweak instrumentation & add automation script * Remove commented out code * Move script and add description * Remove breakpoint --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Timothy Pansino <[email protected]>
1 parent 82918c5 commit fb171c0

File tree

4 files changed

+108
-13
lines changed

4 files changed

+108
-13
lines changed

newrelic/config.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,13 +2162,10 @@ def _process_module_builtin_defaults():
21622162
"newrelic.hooks.mlmodel_langchain",
21632163
"instrument_langchain_callbacks_manager",
21642164
)
2165+
2166+
# VectorStores with similarity_search method
21652167
_process_module_definition(
2166-
"langchain_community.vectorstores.docarray.hnsw",
2167-
"newrelic.hooks.mlmodel_langchain",
2168-
"instrument_langchain_vectorstore_similarity_search",
2169-
)
2170-
_process_module_definition(
2171-
"langchain_community.vectorstores.docarray.in_memory",
2168+
"langchain_community.vectorstores.docarray",
21722169
"newrelic.hooks.mlmodel_langchain",
21732170
"instrument_langchain_vectorstore_similarity_search",
21742171
)
@@ -2178,7 +2175,7 @@ def _process_module_builtin_defaults():
21782175
"instrument_langchain_vectorstore_similarity_search",
21792176
)
21802177
_process_module_definition(
2181-
"langchain_community.vectorstores.redis.base",
2178+
"langchain_community.vectorstores.redis",
21822179
"newrelic.hooks.mlmodel_langchain",
21832180
"instrument_langchain_vectorstore_similarity_search",
21842181
)

newrelic/hooks/mlmodel_langchain.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@
5959
"langchain_community.vectorstores.documentdb": "DocumentDBVectorSearch",
6060
"langchain_community.vectorstores.duckdb": "DuckDB",
6161
"langchain_community.vectorstores.ecloud_vector_search": "EcloudESVectorStore",
62-
"langchain_community.vectorstores.elastic_vector_search": "ElasticVectorSearch",
63-
# "langchain_community.vectorstores.elastic_vector_search": "ElasticKnnSearch", # Deprecated
62+
"langchain_community.vectorstores.elastic_vector_search": ["ElasticVectorSearch", "ElasticKnnSearch"],
6463
"langchain_community.vectorstores.elasticsearch": "ElasticsearchStore",
6564
"langchain_community.vectorstores.epsilla": "Epsilla",
6665
"langchain_community.vectorstores.faiss": "FAISS",
@@ -93,7 +92,7 @@
9392
"langchain_community.vectorstores.pgvector": "PGVector",
9493
"langchain_community.vectorstores.pinecone": "Pinecone",
9594
"langchain_community.vectorstores.qdrant": "Qdrant",
96-
"langchain_community.vectorstores.redis.base": "Redis",
95+
"langchain_community.vectorstores.redis": "Redis",
9796
"langchain_community.vectorstores.relyt": "Relyt",
9897
"langchain_community.vectorstores.rocksetdb": "Rockset",
9998
"langchain_community.vectorstores.scann": "ScaNN",
@@ -126,8 +125,7 @@
126125
"langchain_community.vectorstores.yellowbrick": "Yellowbrick",
127126
"langchain_community.vectorstores.zep_cloud": "ZepCloudVectorStore",
128127
"langchain_community.vectorstores.zep": "ZepVectorStore",
129-
"langchain_community.vectorstores.docarray.hnsw": "DocArrayHnswSearch",
130-
"langchain_community.vectorstores.docarray.in_memory": "DocArrayInMemorySearch",
128+
"langchain_community.vectorstores.docarray": ["DocArrayHnswSearch", "DocArrayInMemorySearch"],
131129
}
132130

133131

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""
2+
This script is used to automatically add new vectorstore classes to the newrelic-python-agent.
3+
To run this script, start from the root of the newrelic-python-agent repository and run:
4+
`python tests/mlmodel_langchain/new_vectorstore_adder.py`
5+
This will generate the necessary code to instrument the new vectorstore classes in the local
6+
copy of the newrelic-python-agent repository.
7+
"""
8+
9+
import os
10+
11+
from langchain_community import vectorstores
12+
13+
from newrelic.hooks.mlmodel_langchain import VECTORSTORE_CLASSES
14+
15+
dir_path = os.path.dirname(os.path.realpath(__file__))
16+
test_dir = os.path.abspath(os.path.join(dir_path, os.pardir))
17+
REPO_PATH = os.path.abspath(os.path.join(test_dir, os.pardir))
18+
19+
20+
def add_to_config(directory, instrumented_class=None):
21+
# Only implement this if there is not an instrumented class within the directory already.
22+
if instrumented_class:
23+
return
24+
25+
with open(f"{REPO_PATH}/newrelic/config.py", "r+") as file:
26+
text = file.read()
27+
text = text.replace(
28+
"VectorStores with similarity_search method",
29+
"VectorStores with similarity_search method\n "
30+
+ "_process_module_definition(\n "
31+
+ f'"{directory}",\n '
32+
+ '"newrelic.hooks.mlmodel_langchain",\n '
33+
+ '"instrument_langchain_vectorstore_similarity_search",\n '
34+
+ ")\n",
35+
1,
36+
)
37+
file.seek(0)
38+
file.write(text)
39+
40+
41+
def add_to_hooks(class_name, directory, instrumented_class=None):
42+
with open(f"{REPO_PATH}/newrelic/hooks/mlmodel_langchain.py", "r+") as file:
43+
text = file.read()
44+
45+
# The directory does not exist yet. Add the new directory and class name to the beginning of the dictionary
46+
if not instrumented_class:
47+
text = text.replace(
48+
"VECTORSTORE_CLASSES = {", "VECTORSTORE_CLASSES = {\n " + f'"{directory}": "{class_name}",', 1
49+
)
50+
51+
# The directory exists, and there are multiple instrumented classes in it. Append to the list.
52+
elif isinstance(instrumented_class, list):
53+
original_list = str(instrumented_class).replace("'", '"')
54+
instrumented_class.append(class_name)
55+
instrumented_class = str(instrumented_class).replace("'", '"')
56+
text = text.replace(
57+
f'"{directory}": {original_list}', f'"{directory}": {instrumented_class}' # TODO: NOT WORKING
58+
)
59+
60+
# The directory exists, but it only has one class. We need to convert this to a list.
61+
else:
62+
text = text.replace(f'"{instrumented_class}"', f'["{instrumented_class}", "{class_name}"]', 1)
63+
64+
file.seek(0)
65+
file.write(text)
66+
67+
68+
def main():
69+
_test_vectorstore_modules_instrumented_ignored_classes = set(
70+
[
71+
"VectorStore", # Base class
72+
"Zilliz", # Inherited from Milvus, which we are already instrumenting.
73+
]
74+
)
75+
76+
vector_store_class_directory = vectorstores._module_lookup
77+
for class_name, directory in vector_store_class_directory.items():
78+
class_ = getattr(vectorstores, class_name)
79+
instrumented_class = VECTORSTORE_CLASSES.get(directory, None)
80+
81+
if (
82+
not hasattr(class_, "similarity_search")
83+
or class_name in _test_vectorstore_modules_instrumented_ignored_classes
84+
):
85+
continue
86+
87+
if not instrumented_class or class_name not in instrumented_class:
88+
if class_name in vector_store_class_directory:
89+
uninstrumented_directory = vector_store_class_directory[class_name]
90+
91+
# Add in newrelic/config.py if there is not an instrumented directory
92+
# Otherwise, config already exists, so no need to duplicate it.
93+
add_to_config(uninstrumented_directory, instrumented_class)
94+
95+
# Add in newrelic/hooks/mlmodel_langchain.py
96+
add_to_hooks(class_name, uninstrumented_directory, instrumented_class)
97+
98+
99+
if __name__ == "__main__":
100+
main()

tests/mlmodel_langchain/test_vectorstore.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def vectorstore_events_sans_content(event):
9292
_test_vectorstore_modules_instrumented_ignored_classes = set(
9393
[
9494
"VectorStore", # Base class
95-
"ElasticKnnSearch", # Deprecated, so we will not be instrumenting this.
95+
"Zilliz", # Inherited from Milvus, which we are already instrumenting.
9696
]
9797
)
9898

0 commit comments

Comments
 (0)