feat: More informative error when query plan too complex (#811)

TrevorBergeron · web-flow · commit 136dc24e1603 · 2024-07-01T11:55:24.000-07:00
diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py
@@ -51,5 +51,9 @@ class OrderRequiredError(ValueError):
     """Operation requires total row ordering to be enabled."""
 
 
+class QueryComplexityError(RuntimeError):
+    """Query plan is too complex to execute."""
+
+
 class TimeTravelDisabledWarning(Warning):
     """A query was reattempted without time travel."""
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
@@ -1833,14 +1833,22 @@ def _start_query(
         Starts BigQuery query job and waits for results.
         """
         job_config = self._prepare_query_job_config(job_config)
-        return bigframes.session._io.bigquery.start_query_with_client(
-            self,
-            sql,
-            job_config,
-            max_results,
-            timeout,
-            api_name=api_name,
-        )
+        try:
+            return bigframes.session._io.bigquery.start_query_with_client(
+                self,
+                sql,
+                job_config,
+                max_results,
+                timeout,
+                api_name=api_name,
+            )
+        except google.api_core.exceptions.BadRequest as e:
+            # Unfortunately, this error type does not have a separate error code or exception type
+            if "Resources exceeded during query execution" in e.message:
+                new_message = "Computation is too complex to execute as a single query. Try using DataFrame.cache() on intermediate results, or setting bigframes.options.compute.enable_multi_query_execution."
+                raise bigframes.exceptions.QueryComplexityError(new_message) from e
+            else:
+                raise
 
     def _start_query_ml_ddl(
         self,
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -4472,13 +4472,26 @@ def test_recursion_limit(scalars_df_index):
     scalars_df_index.to_pandas()
 
 
+def test_query_complexity_error(scalars_df_index):
+    # This test requires automatic caching/query decomposition to be turned off
+    bf_df = scalars_df_index
+    for _ in range(8):
+        bf_df = bf_df.merge(bf_df, on="int64_col").head(30)
+        bf_df = bf_df[bf_df.columns[:20]]
+
+    with pytest.raises(
+        bigframes.exceptions.QueryComplexityError, match=r"Try using DataFrame\.cache"
+    ):
+        bf_df.to_pandas()
+
+
 def test_query_complexity_repeated_joins(
     scalars_df_index, scalars_pandas_df_index, with_multiquery_execution
 ):
     pd_df = scalars_pandas_df_index
     bf_df = scalars_df_index
-    for _ in range(6):
-        # recursively join, resuling in 2^6 - 1 = 63 joins
+    for _ in range(8):
+        # recursively join, resuling in 2^8 - 1 = 255 joins
         pd_df = pd_df.merge(pd_df, on="int64_col").head(30)
         pd_df = pd_df[pd_df.columns[:20]]
         bf_df = bf_df.merge(bf_df, on="int64_col").head(30)