feat: Extend Bigquery detect_anomalies tool to support future data anomaly detection

google-genai-bot · copybara-github · commit 38ea749c9cec · 2025-11-03T23:02:02.000-08:00
ARIMA supports both historical data and future data anomaly detection. This CL add how the tool support future table anomaly detection.

PiperOrigin-RevId: 827803748
diff --git a/src/google/adk/tools/bigquery/query_tool.py b/src/google/adk/tools/bigquery/query_tool.py
@@ -1100,6 +1100,7 @@ def detect_anomalies(
     times_series_timestamp_col: str,
     times_series_data_col: str,
     horizon: Optional[int] = 10,
+    target_data: Optional[str] = None,
     times_series_id_cols: Optional[list[str]] = None,
     anomaly_prob_threshold: Optional[float] = 0.95,
     *,
@@ -1121,6 +1122,9 @@ def detect_anomalies(
         numerical values to be forecasted and anomaly detected.
       horizon (int, optional): The number of time steps to forecast into the
         future. Defaults to 10.
+      target_data (str, optional): The table id of the BigQuery table containing
+        the target time series data or a query statement that select the target
+        data.
       times_series_id_cols (list, optional): The column names of the id columns
         to indicate each time series when there are multiple time series in the
         table. All elements must be strings. Defaults to None.
@@ -1264,6 +1268,18 @@ def detect_anomalies(
   anomaly_detection_query = f"""
   SELECT * FROM ML.DETECT_ANOMALIES(MODEL {model_name}, STRUCT({anomaly_prob_threshold} AS anomaly_prob_threshold))
   """
+  if target_data:
+    trimmed_upper_target_data = target_data.strip().upper()
+    if trimmed_upper_target_data.startswith(
+        "SELECT"
+    ) or trimmed_upper_target_data.startswith("WITH"):
+      target_data_source = f"({target_data})"
+    else:
+      target_data_source = f"SELECT * FROM `{target_data}`"
+
+    anomaly_detection_query = f"""
+    SELECT * FROM ML.DETECT_ANOMALIES(MODEL {model_name}, STRUCT({anomaly_prob_threshold} AS anomaly_prob_threshold), {target_data_source})
+    """
 
   # Create a session and run the create model query.
   original_write_mode = settings.write_mode
diff --git a/tests/unittests/tools/bigquery/test_bigquery_query_tool.py b/tests/unittests/tools/bigquery/test_bigquery_query_tool.py
@@ -1509,6 +1509,62 @@ def test_detect_anomalies_with_custom_params(mock_uuid, mock_execute_sql):
   )
 
 
+# detect_anomalies calls execute_sql twice. We need to test that
+# the queries are properly constructed and call execute_sql with the correct
+# parameters exactly twice.
+@mock.patch("google.adk.tools.bigquery.query_tool.execute_sql", autospec=True)
+@mock.patch("uuid.uuid4", autospec=True)
+def test_detect_anomalies_on_target_table(mock_uuid, mock_execute_sql):
+  """Test time series anomaly detection tool with target data is provided."""
+  mock_credentials = mock.MagicMock(spec=Credentials)
+  mock_settings = BigQueryToolConfig(write_mode=WriteMode.PROTECTED)
+  mock_tool_context = mock.create_autospec(ToolContext, instance=True)
+  mock_uuid.return_value = "test_uuid"
+  mock_execute_sql.return_value = {"status": "SUCCESS"}
+
+  history_data_query = "SELECT * FROM `test-dataset.history-table`"
+  target_data_query = "SELECT * FROM `test-dataset.target-table`"
+  detect_anomalies(
+      project_id="test-project",
+      history_data=history_data_query,
+      times_series_timestamp_col="ts_timestamp",
+      times_series_data_col="ts_data",
+      times_series_id_cols=["dim1", "dim2"],
+      horizon=20,
+      target_data=target_data_query,
+      anomaly_prob_threshold=0.8,
+      credentials=mock_credentials,
+      settings=mock_settings,
+      tool_context=mock_tool_context,
+  )
+
+  expected_create_model_query = """
+  CREATE TEMP MODEL detect_anomalies_model_test_uuid
+    OPTIONS (MODEL_TYPE = 'ARIMA_PLUS', TIME_SERIES_TIMESTAMP_COL = 'ts_timestamp', TIME_SERIES_DATA_COL = 'ts_data', HORIZON = 20, TIME_SERIES_ID_COL = ['dim1', 'dim2'])
+  AS (SELECT * FROM `test-dataset.history-table`)
+  """
+
+  expected_anomaly_detection_query = """
+    SELECT * FROM ML.DETECT_ANOMALIES(MODEL detect_anomalies_model_test_uuid, STRUCT(0.8 AS anomaly_prob_threshold), (SELECT * FROM `test-dataset.target-table`))
+    """
+
+  assert mock_execute_sql.call_count == 2
+  mock_execute_sql.assert_any_call(
+      "test-project",
+      expected_create_model_query,
+      mock_credentials,
+      mock_settings,
+      mock_tool_context,
+  )
+  mock_execute_sql.assert_any_call(
+      "test-project",
+      expected_anomaly_detection_query,
+      mock_credentials,
+      mock_settings,
+      mock_tool_context,
+  )
+
+
 def test_detect_anomalies__with_invalid_id_cols():
   """Test time series anomaly detection tool invocation with invalid times_series_id_cols."""
   mock_credentials = mock.MagicMock(spec=Credentials)