@@ -906,15 +906,53 @@ def test_df_to_pandas_batches(scalars_dfs):
906906 assert_pandas_df_equal (pd .concat (filtered_batches ), pd_result )
907907
908908
909- def test_assign_new_column (scalars_dfs ):
909+ @pytest .mark .parametrize (
910+ ("literal" , "expected_dtype" ),
911+ (
912+ pytest .param (
913+ 2 ,
914+ dtypes .INT_DTYPE ,
915+ id = "INT64" ,
916+ ),
917+ # ====================================================================
918+ # NULL values
919+ #
920+ # These are regression tests for b/428999884. It needs to be possible to
921+ # set a column to NULL with a desired type (not just the pandas default
922+ # of float64).
923+ # ====================================================================
924+ pytest .param (None , dtypes .FLOAT_DTYPE , id = "NULL-None" ),
925+ pytest .param (
926+ pa .scalar (None , type = pa .int64 ()),
927+ dtypes .INT_DTYPE ,
928+ id = "NULL-pyarrow-TIMESTAMP" ,
929+ ),
930+ pytest .param (
931+ pa .scalar (None , type = pa .timestamp ("us" , tz = "UTC" )),
932+ dtypes .TIMESTAMP_DTYPE ,
933+ id = "NULL-pyarrow-TIMESTAMP" ,
934+ ),
935+ pytest .param (
936+ pa .scalar (None , type = pa .timestamp ("us" )),
937+ dtypes .DATETIME_DTYPE ,
938+ id = "NULL-pyarrow-DATETIME" ,
939+ ),
940+ ),
941+ )
942+ def test_assign_new_column_w_literal (scalars_dfs , literal , expected_dtype ):
910943 scalars_df , scalars_pandas_df = scalars_dfs
911- kwargs = {"new_col" : 2 }
912- df = scalars_df .assign (** kwargs )
944+ df = scalars_df .assign (new_col = literal )
913945 bf_result = df .to_pandas ()
914- pd_result = scalars_pandas_df .assign (** kwargs )
915946
916- # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
917- pd_result ["new_col" ] = pd_result ["new_col" ].astype ("Int64" )
947+ new_col_pd = literal
948+ if isinstance (literal , pa .Scalar ):
949+ # PyArrow integer scalars aren't yet supported in pandas Int64Dtype.
950+ new_col_pd = literal .as_py ()
951+
952+ # Pandas might not pick the same dtype as BigFrames, but it should at least
953+ # be castable to it.
954+ pd_result = scalars_pandas_df .assign (new_col = new_col_pd )
955+ pd_result ["new_col" ] = pd_result ["new_col" ].astype (expected_dtype )
918956
919957 assert_pandas_df_equal (bf_result , pd_result )
920958
0 commit comments