19
19
20
20
21
21
import com .google .cloud .bigquery .BigQuery ;
22
+ import com .google .cloud .bigquery .TableId ;
22
23
import com .google .cloud .bigquery .BigQueryError ;
23
24
import com .google .cloud .bigquery .BigQueryException ;
24
25
import com .google .cloud .bigquery .InsertAllRequest ;
37
38
import java .util .Map ;
38
39
39
40
/**
40
- * A {@link BigQueryWriter} capable of updating BigQuery table schemas.
41
+ * A {@link BigQueryWriter} capable of updating BigQuery table schemas and creating non-existed tables automatically .
41
42
*/
42
43
public class AdaptiveBigQueryWriter extends BigQueryWriter {
43
44
private static final Logger logger = LoggerFactory .getLogger (AdaptiveBigQueryWriter .class );
44
45
45
- // The maximum number of retries we will attempt to write rows after updating a BQ table schema.
46
- private static final int AFTER_UPDATE_RETY_LIMIT = 5 ;
46
+ // The maximum number of retries we will attempt to write rows after creating a table or updating a BQ table schema.
47
+ private static final int RETRY_LIMIT = 5 ;
48
+ // Wait for about 30s between each retry since both creating table and updating schema take up to 2~3 minutes to take effect.
49
+ private static final int RETRY_WAIT_TIME = 30000 ;
47
50
48
51
private final BigQuery bigQuery ;
49
52
private final SchemaManager schemaManager ;
53
+ private final boolean autoUpdateSchemas ;
54
+ private final boolean autoCreateTables ;
50
55
51
56
/**
52
57
* @param bigQuery Used to send write requests to BigQuery.
@@ -57,10 +62,14 @@ public class AdaptiveBigQueryWriter extends BigQueryWriter {
57
62
public AdaptiveBigQueryWriter (BigQuery bigQuery ,
58
63
SchemaManager schemaManager ,
59
64
int retry ,
60
- long retryWait ) {
65
+ long retryWait ,
66
+ boolean autoUpdateSchemas ,
67
+ boolean autoCreateTables ) {
61
68
super (retry , retryWait );
62
69
this .bigQuery = bigQuery ;
63
70
this .schemaManager = schemaManager ;
71
+ this .autoUpdateSchemas = autoUpdateSchemas ;
72
+ this .autoCreateTables = autoCreateTables ;
64
73
}
65
74
66
75
private boolean isTableMissingSchema (BigQueryException exception ) {
@@ -69,6 +78,12 @@ private boolean isTableMissingSchema(BigQueryException exception) {
69
78
return exception .getReason () != null && exception .getReason ().equalsIgnoreCase ("invalid" );
70
79
}
71
80
81
+ private boolean isTableNotExistedException (BigQueryException exception ) {
82
+ // If a table does not exist, it will raise a BigQueryException that the input is notFound
83
+ // Referring to Google Cloud Error Codes Doc: https://cloud.google.com/bigquery/docs/error-messages?hl=en
84
+ return exception .getCode () == 404 ;
85
+ }
86
+
72
87
/**
73
88
* Sends the request to BigQuery, then checks the response to see if any errors have occurred. If
74
89
* any have, and all errors can be blamed upon invalid columns in the rows sent, attempts to
@@ -86,21 +101,24 @@ public Map<Long, List<BigQueryError>> performWriteRequest(
86
101
try {
87
102
request = createInsertAllRequest (tableId , rows );
88
103
writeResponse = bigQuery .insertAll (request );
89
- // Should only perform one schema update attempt; may have to continue insert attempts due to
90
- // BigQuery schema updates taking up to two minutes to take effect
104
+ // Should only perform one schema update attempt.
91
105
if (writeResponse .hasErrors ()
92
- && onlyContainsInvalidSchemaErrors (writeResponse .getInsertErrors ())) {
106
+ && onlyContainsInvalidSchemaErrors (writeResponse .getInsertErrors ()) && autoUpdateSchemas ) {
93
107
attemptSchemaUpdate (tableId , topic );
94
108
}
95
109
} catch (BigQueryException exception ) {
96
- if (isTableMissingSchema (exception )) {
110
+ // Should only perform one table creation attempt.
111
+ if (isTableNotExistedException (exception ) && autoCreateTables && bigQuery .getTable (tableId .getBaseTableId ()) == null ) {
112
+ attemptTableCreate (tableId .getBaseTableId (), topic );
113
+ } else if (isTableMissingSchema (exception ) && autoUpdateSchemas ) {
97
114
attemptSchemaUpdate (tableId , topic );
98
115
} else {
99
116
throw exception ;
100
117
}
101
118
}
102
119
103
- // Schema update might be delayed, so multiple insertion attempts may be necessary
120
+ // Creating tables or updating table schemas in BigQuery takes up to 2~3 minutes to take affect,
121
+ // so multiple insertion attempts may be necessary.
104
122
int attemptCount = 0 ;
105
123
while (writeResponse == null || writeResponse .hasErrors ()) {
106
124
logger .trace ("insertion failed" );
@@ -117,10 +135,15 @@ && onlyContainsInvalidSchemaErrors(writeResponse.getInsertErrors())) {
117
135
return writeResponse .getInsertErrors ();
118
136
}
119
137
attemptCount ++;
120
- if (attemptCount >= AFTER_UPDATE_RETY_LIMIT ) {
138
+ if (attemptCount >= RETRY_LIMIT ) {
121
139
throw new BigQueryConnectException (
122
140
"Failed to write rows after BQ schema update within "
123
- + AFTER_UPDATE_RETY_LIMIT + " attempts for: " + tableId .getBaseTableId ());
141
+ + RETRY_LIMIT + " attempts for: " + tableId .getBaseTableId ());
142
+ }
143
+ try {
144
+ Thread .sleep (RETRY_WAIT_TIME );
145
+ } catch (InterruptedException e ) {
146
+ // no-op, we want to keep retrying the insert
124
147
}
125
148
}
126
149
logger .debug ("table insertion completed successfully" );
@@ -136,6 +159,16 @@ private void attemptSchemaUpdate(PartitionedTableId tableId, String topic) {
136
159
}
137
160
}
138
161
162
+ private void attemptTableCreate (TableId tableId , String topic ) {
163
+ try {
164
+ schemaManager .createTable (tableId , topic );
165
+ logger .info ("Table {} does not exist, auto-created table for topic {}" , tableId , topic );
166
+ } catch (BigQueryException exception ) {
167
+ throw new BigQueryConnectException (
168
+ "Failed to create table " + tableId , exception );
169
+ }
170
+ }
171
+
139
172
/*
140
173
* Currently, the only way to determine the cause of an insert all failure is by examining the map
141
174
* object returned by the insertErrors() method of an insert all response. The only way to
0 commit comments