diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 6691e7ef6..57b0eabc2 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -484,6 +484,10 @@ def dataframe_to_bq_schema(dataframe, bq_schema): Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]: The automatically determined schema. Returns None if the type of any column cannot be determined. + + Note: + - If `bq_schema` contains fields not found in the DataFrame, they will + still be included in the resulting schema, and a warning will be issued. """ if pandas_gbq is None: warnings.warn( @@ -543,11 +547,14 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # Catch any schema mismatch. The developer explicitly asked to serialize a # column, but it was not found. if bq_schema_unused: - raise ValueError( + warnings.warn( "bq_schema contains fields not present in dataframe: {}".format( bq_schema_unused - ) + ), + category=UserWarning, ) + for unused_field_name in bq_schema_unused: + bq_schema_out.append(bq_schema_index.get(unused_field_name)) if unknown_type_columns != []: msg = "Could not determine the type of columns: {}".format( diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index bc94f5f54..ba7f50695 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1385,6 +1385,40 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch): assert returned_schema == expected_schema +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_allows_extra_fields(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + dataframe = pandas.DataFrame(df_data) + + dict_schema = [ + {"name": "str_column", "type": "STRING", "mode": "NULLABLE"}, + {"name": "int_column", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, + {"name": "extra_column", "type": "STRING", "mode": "NULLABLE"}, + ] + + with pytest.warns(UserWarning, match="bq_schema contains fields not present"): + returned_schema = module_under_test.dataframe_to_bq_schema( + dataframe, dict_schema + ) + + expected_schema = ( + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOL", "REQUIRED"), + schema.SchemaField("extra_column", "STRING", "NULLABLE"), + ) + assert returned_schema == expected_schema + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow( module_under_test, monkeypatch