4242import com .google .cloud .bigquery .storage .v1 .AppendRowsRequest ;
4343import com .google .cloud .bigquery .storage .v1 .CreateReadSessionRequest ;
4444import com .google .cloud .bigquery .storage .v1 .DataFormat ;
45+ import com .google .cloud .bigquery .storage .v1 .ProtoSchemaConverter ;
4546import com .google .cloud .bigquery .storage .v1 .ReadSession ;
4647import com .google .cloud .bigquery .storage .v1 .ReadStream ;
4748import com .google .gson .JsonArray ;
119120import org .apache .beam .sdk .transforms .PTransform ;
120121import org .apache .beam .sdk .transforms .ParDo ;
121122import org .apache .beam .sdk .transforms .Reshuffle ;
123+ import org .apache .beam .sdk .transforms .SerializableBiFunction ;
124+ import org .apache .beam .sdk .transforms .SerializableBiFunctions ;
122125import org .apache .beam .sdk .transforms .SerializableFunction ;
123126import org .apache .beam .sdk .transforms .SerializableFunctions ;
124127import org .apache .beam .sdk .transforms .SimpleFunction ;
@@ -2297,10 +2300,61 @@ public static <T extends Message> Write<T> writeProtos(Class<T> protoMessageClas
22972300 if (DynamicMessage .class .equals (protoMessageClass )) {
22982301 throw new IllegalArgumentException ("DynamicMessage is not supported." );
22992302 }
2300- return BigQueryIO .<T >write ()
2301- .withFormatFunction (
2302- m -> TableRowToStorageApiProto .tableRowFromMessage (m , false , Predicates .alwaysTrue ()))
2303- .withWriteProtosClass (protoMessageClass );
2303+ try {
2304+ return BigQueryIO .<T >write ()
2305+ .toBuilder ()
2306+ .setFormatFunction (FormatProto .fromClass (protoMessageClass ))
2307+ .build ()
2308+ .withWriteProtosClass (protoMessageClass );
2309+ } catch (Exception e ) {
2310+ throw new RuntimeException (e );
2311+ }
2312+ }
2313+
2314+ private static class FormatProto <T extends Message >
2315+ implements SerializableBiFunction <TableRowToStorageApiProto .SchemaInformation , T , TableRow > {
2316+ transient TableRowToStorageApiProto .SchemaInformation inferredSchemaInformation ;
2317+ final Class <T > protoMessageClass ;
2318+
2319+ FormatProto (Class <T > protoMessageClass ) {
2320+ this .protoMessageClass = protoMessageClass ;
2321+ }
2322+
2323+ TableRowToStorageApiProto .SchemaInformation inferSchemaInformation () {
2324+ try {
2325+ if (inferredSchemaInformation == null ) {
2326+ Descriptors .Descriptor descriptor =
2327+ (Descriptors .Descriptor )
2328+ org .apache .beam .sdk .util .Preconditions .checkStateNotNull (
2329+ protoMessageClass .getMethod ("getDescriptor" ))
2330+ .invoke (null );
2331+ Descriptors .Descriptor convertedDescriptor =
2332+ TableRowToStorageApiProto .wrapDescriptorProto (
2333+ ProtoSchemaConverter .convert (descriptor ).getProtoDescriptor ());
2334+ TableSchema tableSchema =
2335+ TableRowToStorageApiProto .protoSchemaToTableSchema (
2336+ TableRowToStorageApiProto .tableSchemaFromDescriptor (convertedDescriptor ));
2337+ this .inferredSchemaInformation =
2338+ TableRowToStorageApiProto .SchemaInformation .fromTableSchema (tableSchema );
2339+ }
2340+ return inferredSchemaInformation ;
2341+ } catch (Exception e ) {
2342+ throw new RuntimeException (e );
2343+ }
2344+ }
2345+
2346+ static <T extends Message > FormatProto <T > fromClass (Class <T > protoMessageClass )
2347+ throws Exception {
2348+ return new FormatProto <>(protoMessageClass );
2349+ }
2350+
2351+ @ Override
2352+ public TableRow apply (TableRowToStorageApiProto .SchemaInformation schemaInformation , T input ) {
2353+ TableRowToStorageApiProto .SchemaInformation localSchemaInformation =
2354+ schemaInformation != null ? schemaInformation : inferSchemaInformation ();
2355+ return TableRowToStorageApiProto .tableRowFromMessage (
2356+ localSchemaInformation , input , false , Predicates .alwaysTrue ());
2357+ }
23042358 }
23052359
23062360 /** Implementation of {@link #write}. */
@@ -2354,9 +2408,13 @@ public enum Method {
23542408 abstract @ Nullable SerializableFunction <ValueInSingleWindow <T >, TableDestination >
23552409 getTableFunction ();
23562410
2357- abstract @ Nullable SerializableFunction <T , TableRow > getFormatFunction ();
2411+ abstract @ Nullable SerializableBiFunction <
2412+ TableRowToStorageApiProto .SchemaInformation , T , TableRow >
2413+ getFormatFunction ();
23582414
2359- abstract @ Nullable SerializableFunction <T , TableRow > getFormatRecordOnFailureFunction ();
2415+ abstract @ Nullable SerializableBiFunction <
2416+ TableRowToStorageApiProto .SchemaInformation , T , TableRow >
2417+ getFormatRecordOnFailureFunction ();
23602418
23612419 abstract RowWriterFactory .@ Nullable AvroRowWriterFactory <T , ?, ?> getAvroRowWriterFactory ();
23622420
@@ -2467,10 +2525,13 @@ abstract static class Builder<T> {
24672525 abstract Builder <T > setTableFunction (
24682526 SerializableFunction <ValueInSingleWindow <T >, TableDestination > tableFunction );
24692527
2470- abstract Builder <T > setFormatFunction (SerializableFunction <T , TableRow > formatFunction );
2528+ abstract Builder <T > setFormatFunction (
2529+ SerializableBiFunction <TableRowToStorageApiProto .SchemaInformation , T , TableRow >
2530+ formatFunction );
24712531
24722532 abstract Builder <T > setFormatRecordOnFailureFunction (
2473- SerializableFunction <T , TableRow > formatFunction );
2533+ SerializableBiFunction <TableRowToStorageApiProto .SchemaInformation , T , TableRow >
2534+ formatFunction );
24742535
24752536 abstract Builder <T > setAvroRowWriterFactory (
24762537 RowWriterFactory .AvroRowWriterFactory <T , ?, ?> avroRowWriterFactory );
@@ -2718,7 +2779,9 @@ public Write<T> to(DynamicDestinations<T, ?> dynamicDestinations) {
27182779
27192780 /** Formats the user's type into a {@link TableRow} to be written to BigQuery. */
27202781 public Write <T > withFormatFunction (SerializableFunction <T , TableRow > formatFunction ) {
2721- return toBuilder ().setFormatFunction (formatFunction ).build ();
2782+ return toBuilder ()
2783+ .setFormatFunction (SerializableBiFunctions .ignore1st (formatFunction ))
2784+ .build ();
27222785 }
27232786
27242787 /**
@@ -2733,7 +2796,9 @@ public Write<T> withFormatFunction(SerializableFunction<T, TableRow> formatFunct
27332796 */
27342797 public Write <T > withFormatRecordOnFailureFunction (
27352798 SerializableFunction <T , TableRow > formatFunction ) {
2736- return toBuilder ().setFormatRecordOnFailureFunction (formatFunction ).build ();
2799+ return toBuilder ()
2800+ .setFormatRecordOnFailureFunction (SerializableBiFunctions .ignore1st (formatFunction ))
2801+ .build ();
27372802 }
27382803
27392804 /**
@@ -3599,9 +3664,10 @@ && getStorageApiTriggeringFrequency(bqOptions) != null) {
35993664 private <DestinationT > WriteResult expandTyped (
36003665 PCollection <T > input , DynamicDestinations <T , DestinationT > dynamicDestinations ) {
36013666 boolean optimizeWrites = getOptimizeWrites ();
3602- SerializableFunction <T , TableRow > formatFunction = getFormatFunction ();
3603- SerializableFunction <T , TableRow > formatRecordOnFailureFunction =
3604- getFormatRecordOnFailureFunction ();
3667+ SerializableBiFunction <TableRowToStorageApiProto .SchemaInformation , T , TableRow >
3668+ formatFunction = getFormatFunction ();
3669+ SerializableBiFunction <TableRowToStorageApiProto .SchemaInformation , T , TableRow >
3670+ formatRecordOnFailureFunction = getFormatRecordOnFailureFunction ();
36053671 RowWriterFactory .AvroRowWriterFactory <T , ?, DestinationT > avroRowWriterFactory =
36063672 (RowWriterFactory .AvroRowWriterFactory <T , ?, DestinationT >) getAvroRowWriterFactory ();
36073673
@@ -3623,7 +3689,8 @@ private <DestinationT> WriteResult expandTyped(
36233689 // If no format function set, then we will automatically convert the input type to a
36243690 // TableRow.
36253691 // TODO: it would be trivial to convert to avro records here instead.
3626- formatFunction = BigQueryUtils .toTableRow (input .getToRowFunction ());
3692+ formatFunction =
3693+ SerializableBiFunctions .ignore1st (BigQueryUtils .toTableRow (input .getToRowFunction ()));
36273694 }
36283695 // Infer the TableSchema from the input Beam schema.
36293696 // TODO: If the user provided a schema, we should use that. There are things that can be
@@ -3769,8 +3836,9 @@ private <DestinationT> WriteResult continueExpandTyped(
37693836 getCreateDisposition (),
37703837 dynamicDestinations ,
37713838 elementCoder ,
3772- tableRowWriterFactory .getToRowFn (),
3773- tableRowWriterFactory .getToFailsafeRowFn ())
3839+ SerializableBiFunctions .fix1st (tableRowWriterFactory .getToRowFn (), null ),
3840+ SerializableBiFunctions .fix1st (
3841+ tableRowWriterFactory .getToFailsafeRowFn (), null ))
37743842 .withInsertRetryPolicy (retryPolicy )
37753843 .withTestServices (getBigQueryServices ())
37763844 .withExtendedErrorInfo (getExtendedErrorInfo ())
0 commit comments