Add Vectorstore instrumentation automation (#1279)

lrafeei · mergify[bot] · TimPansino · web-flow · commit fb171c0cbfe7 · 2025-01-29T12:02:25.000-08:00
* Tweak instrumentation &amp; add automation script

* Remove commented out code

* Move script and add description

* Remove breakpoint

---------

Co-authored-by: mergify[bot] &lt;37929162+mergify[bot]@users.noreply.github.com&gt;
Co-authored-by: Timothy Pansino &lt;11214426+TimPansino@users.noreply.github.com&gt;
diff --git a/newrelic/config.py b/newrelic/config.py
@@ -2162,13 +2162,10 @@ def _process_module_builtin_defaults():
         "newrelic.hooks.mlmodel_langchain",
         "instrument_langchain_callbacks_manager",
     )
+
+    # VectorStores with similarity_search method
     _process_module_definition(
-        "langchain_community.vectorstores.docarray.hnsw",
-        "newrelic.hooks.mlmodel_langchain",
-        "instrument_langchain_vectorstore_similarity_search",
-    )
-    _process_module_definition(
-        "langchain_community.vectorstores.docarray.in_memory",
+        "langchain_community.vectorstores.docarray",
         "newrelic.hooks.mlmodel_langchain",
         "instrument_langchain_vectorstore_similarity_search",
     )
@@ -2178,7 +2175,7 @@ def _process_module_builtin_defaults():
         "instrument_langchain_vectorstore_similarity_search",
     )
     _process_module_definition(
-        "langchain_community.vectorstores.redis.base",
+        "langchain_community.vectorstores.redis",
         "newrelic.hooks.mlmodel_langchain",
         "instrument_langchain_vectorstore_similarity_search",
     )
diff --git a/newrelic/hooks/mlmodel_langchain.py b/newrelic/hooks/mlmodel_langchain.py
@@ -59,8 +59,7 @@
     "langchain_community.vectorstores.documentdb": "DocumentDBVectorSearch",
     "langchain_community.vectorstores.duckdb": "DuckDB",
     "langchain_community.vectorstores.ecloud_vector_search": "EcloudESVectorStore",
-    "langchain_community.vectorstores.elastic_vector_search": "ElasticVectorSearch",
-    # "langchain_community.vectorstores.elastic_vector_search": "ElasticKnnSearch", # Deprecated
+    "langchain_community.vectorstores.elastic_vector_search": ["ElasticVectorSearch", "ElasticKnnSearch"],
     "langchain_community.vectorstores.elasticsearch": "ElasticsearchStore",
     "langchain_community.vectorstores.epsilla": "Epsilla",
     "langchain_community.vectorstores.faiss": "FAISS",
@@ -93,7 +92,7 @@
     "langchain_community.vectorstores.pgvector": "PGVector",
     "langchain_community.vectorstores.pinecone": "Pinecone",
     "langchain_community.vectorstores.qdrant": "Qdrant",
-    "langchain_community.vectorstores.redis.base": "Redis",
+    "langchain_community.vectorstores.redis": "Redis",
     "langchain_community.vectorstores.relyt": "Relyt",
     "langchain_community.vectorstores.rocksetdb": "Rockset",
     "langchain_community.vectorstores.scann": "ScaNN",
@@ -126,8 +125,7 @@
     "langchain_community.vectorstores.yellowbrick": "Yellowbrick",
     "langchain_community.vectorstores.zep_cloud": "ZepCloudVectorStore",
     "langchain_community.vectorstores.zep": "ZepVectorStore",
-    "langchain_community.vectorstores.docarray.hnsw": "DocArrayHnswSearch",
-    "langchain_community.vectorstores.docarray.in_memory": "DocArrayInMemorySearch",
+    "langchain_community.vectorstores.docarray": ["DocArrayHnswSearch", "DocArrayInMemorySearch"],
 }
 
 
diff --git a/tests/mlmodel_langchain/new_vectorstore_adder.py b/tests/mlmodel_langchain/new_vectorstore_adder.py
@@ -0,0 +1,100 @@
+"""
+    This script is used to automatically add new vectorstore classes to the newrelic-python-agent.
+    To run this script, start from the root of the newrelic-python-agent repository and run:
+        `python tests/mlmodel_langchain/new_vectorstore_adder.py`
+    This will generate the necessary code to instrument the new vectorstore classes in the local
+    copy of the newrelic-python-agent repository.
+"""
+
+import os
+
+from langchain_community import vectorstores
+
+from newrelic.hooks.mlmodel_langchain import VECTORSTORE_CLASSES
+
+dir_path = os.path.dirname(os.path.realpath(__file__))
+test_dir = os.path.abspath(os.path.join(dir_path, os.pardir))
+REPO_PATH = os.path.abspath(os.path.join(test_dir, os.pardir))
+
+
+def add_to_config(directory, instrumented_class=None):
+    # Only implement this if there is not an instrumented class within the directory already.
+    if instrumented_class:
+        return
+
+    with open(f"{REPO_PATH}/newrelic/config.py", "r+") as file:
+        text = file.read()
+        text = text.replace(
+            "VectorStores with similarity_search method",
+            "VectorStores with similarity_search method\n    "
+            + "_process_module_definition(\n        "
+            + f'"{directory}",\n        '
+            + '"newrelic.hooks.mlmodel_langchain",\n        '
+            + '"instrument_langchain_vectorstore_similarity_search",\n    '
+            + ")\n",
+            1,
+        )
+        file.seek(0)
+        file.write(text)
+
+
+def add_to_hooks(class_name, directory, instrumented_class=None):
+    with open(f"{REPO_PATH}/newrelic/hooks/mlmodel_langchain.py", "r+") as file:
+        text = file.read()
+
+        # The directory does not exist yet.  Add the new directory and class name to the beginning of the dictionary
+        if not instrumented_class:
+            text = text.replace(
+                "VECTORSTORE_CLASSES = {", "VECTORSTORE_CLASSES = {\n    " + f'"{directory}": "{class_name}",', 1
+            )
+
+        # The directory exists, and there are multiple instrumented classes in it.  Append to the list.
+        elif isinstance(instrumented_class, list):
+            original_list = str(instrumented_class).replace("'", '"')
+            instrumented_class.append(class_name)
+            instrumented_class = str(instrumented_class).replace("'", '"')
+            text = text.replace(
+                f'"{directory}": {original_list}', f'"{directory}": {instrumented_class}'  # TODO: NOT WORKING
+            )
+
+        # The directory exists, but it only has one class.  We need to convert this to a list.
+        else:
+            text = text.replace(f'"{instrumented_class}"', f'["{instrumented_class}", "{class_name}"]', 1)
+
+        file.seek(0)
+        file.write(text)
+
+
+def main():
+    _test_vectorstore_modules_instrumented_ignored_classes = set(
+        [
+            "VectorStore",  # Base class
+            "Zilliz",  # Inherited from Milvus, which we are already instrumenting.
+        ]
+    )
+
+    vector_store_class_directory = vectorstores._module_lookup
+    for class_name, directory in vector_store_class_directory.items():
+        class_ = getattr(vectorstores, class_name)
+        instrumented_class = VECTORSTORE_CLASSES.get(directory, None)
+
+        if (
+            not hasattr(class_, "similarity_search")
+            or class_name in _test_vectorstore_modules_instrumented_ignored_classes
+        ):
+            continue
+
+        if not instrumented_class or class_name not in instrumented_class:
+            if class_name in vector_store_class_directory:
+                uninstrumented_directory = vector_store_class_directory[class_name]
+
+                # Add in newrelic/config.py if there is not an instrumented directory
+                # Otherwise, config already exists, so no need to duplicate it.
+                add_to_config(uninstrumented_directory, instrumented_class)
+
+                # Add in newrelic/hooks/mlmodel_langchain.py
+                add_to_hooks(class_name, uninstrumented_directory, instrumented_class)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/mlmodel_langchain/test_vectorstore.py b/tests/mlmodel_langchain/test_vectorstore.py
@@ -92,7 +92,7 @@ def vectorstore_events_sans_content(event):
 _test_vectorstore_modules_instrumented_ignored_classes = set(
     [
         "VectorStore",  # Base class
-        "ElasticKnnSearch",  # Deprecated, so we will not be instrumenting this.
+        "Zilliz",  # Inherited from Milvus, which we are already instrumenting.
     ]
 )
 

Original file line number	Diff line number	Diff line change
`@@ -92,7 +92,7 @@ def vectorstore_events_sans_content(event):`
`92`	`92`	`_test_vectorstore_modules_instrumented_ignored_classes = set(`
`93`	`93`	`[`
`94`	`94`	`"VectorStore", # Base class`
`95`		`- "ElasticKnnSearch", # Deprecated, so we will not be instrumenting this.`
	`95`	`+ "Zilliz", # Inherited from Milvus, which we are already instrumenting.`
`96`	`96`	`]`
`97`	`97`	`)`
`98`	`98`