Fix/drift n depth (#1676)

AlonsoGuevara · web-flow · commit 0805924a35d6 · 2025-02-05T17:22:34.000-06:00
* Fix n_depth param

* Semver

* Change smoke tests params for drift

* Reduce log printing for expected exceptions
diff --git a/.semversioner/next-release/patch-20250204235603673724.json b/.semversioner/next-release/patch-20250204235603673724.json
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Fix proper use of n_depth for drift search"
+}
diff --git a/graphrag/config/defaults.py b/graphrag/config/defaults.py
@@ -187,7 +187,7 @@
 # DRIFT Search
 DRIFT_SEARCH_LLM_TEMPERATURE = 0
 DRIFT_SEARCH_LLM_TOP_P = 1
-DRIFT_SEARCH_LLM_N = 3
+DRIFT_SEARCH_LLM_N = 1
 DRIFT_SEARCH_MAX_TOKENS = 12_000
 DRIFT_SEARCH_DATA_MAX_TOKENS = 12_000
 DRIFT_SEARCH_CONCURRENCY = 32
diff --git a/graphrag/query/llm/text_utils.py b/graphrag/query/llm/text_utils.py
@@ -50,7 +50,7 @@ def chunk_text(
     yield from (token_encoder.decode(list(chunk)) for chunk in chunk_iterator)
 
 
-def try_parse_json_object(input: str) -> tuple[str, dict]:
+def try_parse_json_object(input: str, verbose: bool = True) -> tuple[str, dict]:
     """JSON cleaning and formatting utilities."""
     # Sometimes, the LLM returns a json string with some extra description, this function will clean it up.
 
@@ -59,7 +59,8 @@ def try_parse_json_object(input: str) -> tuple[str, dict]:
         # Try parse first
         result = json.loads(input)
     except json.JSONDecodeError:
-        log.info("Warning: Error decoding faulty json, attempting repair")
+        if verbose:
+            log.info("Warning: Error decoding faulty json, attempting repair")
 
     if result:
         return input, result
@@ -97,11 +98,13 @@ def try_parse_json_object(input: str) -> tuple[str, dict]:
         try:
             result = json.loads(input)
         except json.JSONDecodeError:
-            log.exception("error loading json, json=%s", input)
+            if verbose:
+                log.exception("error loading json, json=%s", input)
             return input, {}
         else:
             if not isinstance(result, dict):
-                log.exception("not expected dict type. type=%s:", type(result))
+                if verbose:
+                    log.exception("not expected dict type. type=%s:", type(result))
                 return input, {}
             return input, result
     else:
diff --git a/graphrag/query/structured_search/drift_search/action.py b/graphrag/query/structured_search/drift_search/action.py
@@ -7,6 +7,8 @@
 import logging
 from typing import Any
 
+from graphrag.query.llm.text_utils import try_parse_json_object
+
 log = logging.getLogger(__name__)
 
 
@@ -72,17 +74,12 @@ async def asearch(self, search_engine: Any, global_query: str, scorer: Any = Non
             drift_query=global_query, query=self.query
         )
 
-        try:
-            response = json.loads(search_result.response)
-        except json.JSONDecodeError:
-            error_message = "Failed to parse search response"
-            log.exception("%s: %s", error_message, search_result.response)
-            # Do not launch exception as it will roll up with other steps
-            # Instead return an empty response and let score -inf handle it
-            response = {}
+        # Do not launch exception as it will roll up with other steps
+        # Instead return an empty response and let score -inf handle it
+        _, response = try_parse_json_object(search_result.response, verbose=False)
 
         self.answer = response.pop("response", None)
-        self.score = response.pop("score", float("-inf"))
+        self.score = float(response.pop("score", "-inf"))
         self.metadata.update({"context_data": search_result.context_data})
 
         if self.answer is None:
diff --git a/graphrag/query/structured_search/drift_search/search.py b/graphrag/query/structured_search/drift_search/search.py
@@ -219,7 +219,7 @@ async def asearch(
         # Main loop
         epochs = 0
         llm_call_offset = 0
-        while epochs < self.context_builder.config.n:
+        while epochs < self.context_builder.config.n_depth:
             actions = self.query_state.rank_incomplete_actions()
             if len(actions) == 0:
                 log.info("No more actions to take. Exiting DRIFT loop.")
diff --git a/tests/fixtures/min-csv/settings.yml b/tests/fixtures/min-csv/settings.yml
@@ -50,4 +50,9 @@ reporting:
   base_dir: "logs"
 
 snapshots:
-  embeddings: True
+  embeddings: True
+
+drift_search:
+  n_depth: 1
+  k_follow_ups: 3
+  primer_folds: 3
diff --git a/tests/fixtures/text/settings.yml b/tests/fixtures/text/settings.yml
@@ -55,4 +55,9 @@ reporting:
   base_dir: "logs"
 
 snapshots:
-  embeddings: True
+  embeddings: True
+
+drift_search:
+  n_depth: 1
+  k_follow_ups: 3
+  primer_folds: 3

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "type": "patch",
 +  "description": "Fix proper use of n_depth for drift search"
 +}