@@ -1126,32 +1126,57 @@ def test_read_pickle_gcs(session, penguins_pandas_df_default_index, gcs_folder):
1126
1126
1127
1127
1128
1128
@pytest .mark .parametrize (
1129
- ("engine" ,),
1129
+ ("engine" , "filename" ),
1130
1130
(
1131
- ("auto" ,),
1132
- ("bigquery" ,),
1131
+ pytest .param (
1132
+ "auto" ,
1133
+ "000000000000.parquet" ,
1134
+ id = "auto" ,
1135
+ ),
1136
+ pytest .param (
1137
+ "pyarrow" ,
1138
+ "000000000000.parquet" ,
1139
+ id = "pyarrow" ,
1140
+ ),
1141
+ pytest .param (
1142
+ "bigquery" ,
1143
+ "000000000000.parquet" ,
1144
+ id = "bigquery" ,
1145
+ ),
1146
+ pytest .param (
1147
+ "bigquery" ,
1148
+ "*.parquet" ,
1149
+ id = "bigquery_wildcard" ,
1150
+ ),
1151
+ pytest .param (
1152
+ "auto" ,
1153
+ "*.parquet" ,
1154
+ id = "auto_wildcard" ,
1155
+ marks = pytest .mark .xfail (
1156
+ raises = ValueError ,
1157
+ ),
1158
+ ),
1133
1159
),
1134
1160
)
1135
- def test_read_parquet_gcs (session : bigframes .Session , scalars_dfs , gcs_folder , engine ):
1161
+ def test_read_parquet_gcs (
1162
+ session : bigframes .Session , scalars_dfs , gcs_folder , engine , filename
1163
+ ):
1136
1164
scalars_df , _ = scalars_dfs
1137
1165
# Include wildcard so that multiple files can be written/read if > 1 GB.
1138
1166
# https://cloud.google.com/bigquery/docs/exporting-data#exporting_data_into_one_or_more_files
1139
- path = gcs_folder + test_read_parquet_gcs .__name__ + "*.parquet"
1167
+ write_path = gcs_folder + test_read_parquet_gcs .__name__ + "*.parquet"
1168
+ read_path = gcs_folder + test_read_parquet_gcs .__name__ + filename
1140
1169
1141
1170
df_in : bigframes .dataframe .DataFrame = scalars_df .copy ()
1142
1171
# GEOGRAPHY not supported in parquet export.
1143
1172
df_in = df_in .drop (columns = "geography_col" )
1144
1173
# Make sure we can also serialize the order.
1145
1174
df_write = df_in .reset_index (drop = False )
1146
1175
df_write .index .name = f"ordering_id_{ random .randrange (1_000_000 )} "
1147
- df_write .to_parquet (path , index = True )
1148
-
1149
- # Only bigquery engine for reads supports wildcards in path name.
1150
- if engine != "bigquery" :
1151
- path = utils .get_first_file_from_wildcard (path )
1176
+ df_write .to_parquet (write_path , index = True )
1152
1177
1153
1178
df_out = (
1154
- session .read_parquet (path , engine = engine )
1179
+ session .read_parquet (read_path , engine = engine )
1155
1180
# Restore order.
1156
1181
.set_index (df_write .index .name ).sort_index ()
1157
1182
# Restore index.
0 commit comments