Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,10 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]:
The automatically determined schema. Returns None if the type of
any column cannot be determined.

Note:
- If `bq_schema` contains fields not found in the DataFrame, they will
still be included in the resulting schema, and a warning will be issued.
"""
if pandas_gbq is None:
warnings.warn(
Expand Down Expand Up @@ -543,11 +547,14 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
# Catch any schema mismatch. The developer explicitly asked to serialize a
# column, but it was not found.
if bq_schema_unused:
raise ValueError(
warnings.warn(
"bq_schema contains fields not present in dataframe: {}".format(
bq_schema_unused
)
),
category=UserWarning,
)
for unused_field_name in bq_schema_unused:
bq_schema_out.append(bq_schema_index.get(unused_field_name))

if unknown_type_columns != []:
msg = "Could not determine the type of columns: {}".format(
Expand Down
34 changes: 34 additions & 0 deletions tests/unit/test__pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,40 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch):
assert returned_schema == expected_schema


@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_dataframe_to_bq_schema_allows_extra_fields(module_under_test, monkeypatch):
monkeypatch.setattr(module_under_test, "pandas_gbq", None)

df_data = collections.OrderedDict(
[
("str_column", ["hello", "world"]),
("int_column", [42, 8]),
("bool_column", [True, False]),
]
)
dataframe = pandas.DataFrame(df_data)

dict_schema = [
{"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
{"name": "int_column", "type": "INTEGER", "mode": "NULLABLE"},
{"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
{"name": "extra_column", "type": "STRING", "mode": "NULLABLE"},
]

with pytest.warns(UserWarning, match="bq_schema contains fields not present"):
returned_schema = module_under_test.dataframe_to_bq_schema(
dataframe, dict_schema
)

expected_schema = (
schema.SchemaField("str_column", "STRING", "NULLABLE"),
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
schema.SchemaField("extra_column", "STRING", "NULLABLE"),
)
assert returned_schema == expected_schema


@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(
module_under_test, monkeypatch
Expand Down