googleapis · lkhagvadorj-amp · Apr 17, 2025 · May 20, 2025
@@ -484,6 +484,10 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
         Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]:
             The automatically determined schema. Returns None if the type of
             any column cannot be determined.
+
+    Note:
+        - If `bq_schema` contains fields not found in the DataFrame, they will
+          still be included in the resulting schema, and a warning will be issued.
     """
     if pandas_gbq is None:
         warnings.warn(
@@ -543,11 +547,14 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
     # Catch any schema mismatch. The developer explicitly asked to serialize a
     # column, but it was not found.
     if bq_schema_unused:
-        raise ValueError(
+        warnings.warn(
             "bq_schema contains fields not present in dataframe: {}".format(
                 bq_schema_unused
-            )
+            ),
+            category=UserWarning,
         )
+        for unused_field_name in bq_schema_unused:
+            bq_schema_out.append(bq_schema_index.get(unused_field_name))
 
     if unknown_type_columns != []:
         msg = "Could not determine the type of columns: {}".format(

@@ -1385,6 +1385,40 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch):
     assert returned_schema == expected_schema
 
 
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_dataframe_to_bq_schema_allows_extra_fields(module_under_test, monkeypatch):
+    monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
+    df_data = collections.OrderedDict(
+        [
+            ("str_column", ["hello", "world"]),
+            ("int_column", [42, 8]),
+            ("bool_column", [True, False]),
+        ]
+    )
+    dataframe = pandas.DataFrame(df_data)
+
+    dict_schema = [
+        {"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
+        {"name": "int_column", "type": "INTEGER", "mode": "NULLABLE"},
+        {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
+        {"name": "extra_column", "type": "STRING", "mode": "NULLABLE"},
+    ]
+
+    with pytest.warns(UserWarning, match="bq_schema contains fields not present"):
+        returned_schema = module_under_test.dataframe_to_bq_schema(
+            dataframe, dict_schema
+        )
+
+    expected_schema = (
+        schema.SchemaField("str_column", "STRING", "NULLABLE"),
+        schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
+        schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
+        schema.SchemaField("extra_column", "STRING", "NULLABLE"),
+    )
+    assert returned_schema == expected_schema
+
+
 @pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(
     module_under_test, monkeypatch