test: add tests for gemini structured output array and struct types (#1670)

GarrettWu · web-flow · commit 340b93daf3bf · 2025-05-05T17:16:44.000-05:00
* test: add tests for gemini structured output array and struct types

* fix test in python 3.9
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
@@ -617,7 +617,7 @@ def predict(
                 It creates a struct column of the items of the iterable, and use the concatenated result as the input prompt. No-op if set to None.
             output_schema (Mapping[str, str] or None, default None):
                 The schema used to generate structured output as a bigframes DataFrame. The schema is a string key-value pair of <column_name>:<type>.
-                Supported types are int64, float64, bool and string. If None, output text result.
+                Supported types are int64, float64, bool, string, array<type> and struct<column type>. If None, output text result.
         Returns:
             bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
         """
diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py
@@ -29,7 +29,6 @@
     globals,
     imported,
     linear_model,
-    llm,
     remote,
 )
 
@@ -339,20 +338,3 @@ def imported_xgboost_model(
         output={"predicted_label": "float64"},
         model_path=imported_xgboost_array_model_path,
     )
-
-
-@pytest.fixture(scope="session")
-def bqml_gemini_text_generator(bq_connection, session) -> llm.GeminiTextGenerator:
-    return llm.GeminiTextGenerator(
-        model_name="gemini-1.5-flash-002",
-        connection_name=bq_connection,
-        session=session,
-    )
-
-
-@pytest.fixture(scope="session")
-def bqml_claude3_text_generator(bq_connection, session) -> llm.Claude3TextGenerator:
-    return llm.Claude3TextGenerator(
-        connection_name=bq_connection,
-        session=session,
-    )
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
@@ -16,6 +16,7 @@
 from unittest import mock
 
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import bigframes
@@ -253,22 +254,27 @@ def test_gemini_text_generator_predict_output_schema_success(
         "int_output": "int64",
         "float_output": "float64",
         "str_output": "string",
+        "array_output": "array<int64>",
+        "struct_output": "struct<number int64>",
     }
-    df = gemini_text_generator_model.predict(
-        llm_text_df, output_schema=output_schema
-    ).to_pandas()
+    df = gemini_text_generator_model.predict(llm_text_df, output_schema=output_schema)
+    assert df["bool_output"].dtype == pd.BooleanDtype()
+    assert df["int_output"].dtype == pd.Int64Dtype()
+    assert df["float_output"].dtype == pd.Float64Dtype()
+    assert df["str_output"].dtype == pd.StringDtype(storage="pyarrow")
+    assert df["array_output"].dtype == pd.ArrowDtype(pa.list_(pa.int64()))
+    assert df["struct_output"].dtype == pd.ArrowDtype(
+        pa.struct([("number", pa.int64())])
+    )
+
+    pd_df = df.to_pandas()
     utils.check_pandas_df_schema_and_index(
-        df,
+        pd_df,
         columns=list(output_schema.keys()) + ["prompt", "full_response", "status"],
         index=3,
         col_exact=False,
     )
 
-    assert df["bool_output"].dtype == pd.BooleanDtype()
-    assert df["int_output"].dtype == pd.Int64Dtype()
-    assert df["float_output"].dtype == pd.Float64Dtype()
-    assert df["str_output"].dtype == pd.StringDtype(storage="pyarrow")
-
 
 # Overrides __eq__ function for comparing as mock.call parameter
 class EqCmpAllDataFrame(bpd.DataFrame):
@@ -305,8 +311,7 @@ def test_text_generator_retry_success(
     session,
     model_class,
     options,
-    bqml_gemini_text_generator: llm.GeminiTextGenerator,
-    bqml_claude3_text_generator: llm.Claude3TextGenerator,
+    bq_connection,
 ):
     # Requests.
     df0 = EqCmpAllDataFrame(
@@ -387,11 +392,7 @@ def test_text_generator_retry_success(
         ),
     ]
 
-    text_generator_model = (
-        bqml_gemini_text_generator
-        if (model_class == llm.GeminiTextGenerator)
-        else bqml_claude3_text_generator
-    )
+    text_generator_model = model_class(connection_name=bq_connection, session=session)
     text_generator_model._bqml_model = mock_bqml_model
 
     with mock.patch.object(core.BqmlModel, "generate_text_tvf", generate_text_tvf):
@@ -448,13 +449,7 @@ def test_text_generator_retry_success(
         ),
     ],
 )
-def test_text_generator_retry_no_progress(
-    session,
-    model_class,
-    options,
-    bqml_gemini_text_generator: llm.GeminiTextGenerator,
-    bqml_claude3_text_generator: llm.Claude3TextGenerator,
-):
+def test_text_generator_retry_no_progress(session, model_class, options, bq_connection):
     # Requests.
     df0 = EqCmpAllDataFrame(
         {
@@ -514,11 +509,7 @@ def test_text_generator_retry_no_progress(
         ),
     ]
 
-    text_generator_model = (
-        bqml_gemini_text_generator
-        if (model_class == llm.GeminiTextGenerator)
-        else bqml_claude3_text_generator
-    )
+    text_generator_model = model_class(connection_name=bq_connection, session=session)
     text_generator_model._bqml_model = mock_bqml_model
 
     with mock.patch.object(core.BqmlModel, "generate_text_tvf", generate_text_tvf):
diff --git a/tests/system/small/ml/test_multimodal_llm.py b/tests/system/small/ml/test_multimodal_llm.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pandas as pd
+import pyarrow as pa
 import pytest
 
 import bigframes
@@ -68,3 +70,55 @@ def test_gemini_text_generator_multimodal_input(
         index=2,
         col_exact=False,
     )
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-1.5-pro-001",
+        # "gemini-1.5-pro-002",
+        "gemini-1.5-flash-001",
+        "gemini-1.5-flash-002",
+        "gemini-2.0-flash-exp",
+        "gemini-2.0-flash-001",
+    ),
+)
+@pytest.mark.flaky(retries=2)
+def test_gemini_text_generator_multimodal_structured_output(
+    images_mm_df: bpd.DataFrame, model_name, test_session, bq_connection
+):
+    bigframes.options.experiments.blob = True
+
+    gemini_text_generator_model = llm.GeminiTextGenerator(
+        model_name=model_name, connection_name=bq_connection, session=test_session
+    )
+    output_schema = {
+        "bool_output": "bool",
+        "int_output": "int64",
+        "float_output": "float64",
+        "str_output": "string",
+        "array_output": "array<int64>",
+        "struct_output": "struct<number int64>",
+    }
+    df = gemini_text_generator_model.predict(
+        images_mm_df,
+        prompt=["Describe", images_mm_df["blob_col"]],
+        output_schema=output_schema,
+    )
+    assert df["bool_output"].dtype == pd.BooleanDtype()
+    assert df["int_output"].dtype == pd.Int64Dtype()
+    assert df["float_output"].dtype == pd.Float64Dtype()
+    assert df["str_output"].dtype == pd.StringDtype(storage="pyarrow")
+    assert df["array_output"].dtype == pd.ArrowDtype(pa.list_(pa.int64()))
+    assert df["struct_output"].dtype == pd.ArrowDtype(
+        pa.struct([("number", pa.int64())])
+    )
+
+    pd_df = df.to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        pd_df,
+        columns=list(output_schema.keys())
+        + ["blob_col", "prompt", "full_response", "status"],
+        index=2,
+        col_exact=False,
+    )