You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
* add bigquery options
* update some language
* update pipeline and parameters based on option files
* update table parameter language
* update default for numStreams
* update kafka option to fix spotless error and template
* forgot to update IT :)
* fix spotless and correct parameter
@@ -45,85 +45,196 @@ public interface KafkaToBigQueryYaml {
45
45
46
46
@TemplateParameter.Text(
47
47
order = 1,
48
-
name = "readBootstrapServers",
48
+
name = "bootstrapServers",
49
49
optional = false,
50
-
description = "Kafka Bootstrap Server list",
51
-
helpText = "Kafka Bootstrap Server list, separated by commas.",
52
-
example = "localhost:9092,127.0.0.1:9093")
50
+
description =
51
+
"A list of host/port pairs to use for establishing the initial connection to the Kafka cluster.",
52
+
helpText =
53
+
"A list of host/port pairs to use for establishing the initial connection to the Kafka cluster. For example: host1:port1,host2:port2",
54
+
example = "host1:port1,host2:port2,localhost:9092,127.0.0.1:9093")
53
55
@Validation.Required
54
-
StringgetReadBootstrapServers();
56
+
StringgetBootstrapServers();
55
57
56
58
@TemplateParameter.Text(
57
59
order = 2,
58
-
name = "kafkaReadTopics",
60
+
name = "topic",
59
61
optional = false,
60
-
description = "Kafka topic(s) to read the input from.",
61
-
helpText = "Kafka topic(s) to read the input from.",
62
-
example = "topic1,topic2")
62
+
description = "Kafka topic to read from.",
63
+
helpText = "Kafka topic to read from. For example: my_topic",
64
+
example = "my_topic")
63
65
@Validation.Required
64
-
StringgetKafkaReadTopics();
66
+
StringgetTopic();
65
67
66
-
@TemplateParameter.Text(
68
+
@TemplateParameter.Boolean(
67
69
order = 3,
68
-
name = "outputTableSpec",
69
-
optional = false,
70
-
description = "BigQuery output table",
71
-
helpText =
72
-
"BigQuery table location to write the output to. The name should be in the format <project>:<dataset>.<table_name>`. The table's schema must match input objects.",
73
-
example = "")
74
-
@Validation.Required
75
-
StringgetOutputTableSpec();
70
+
name = "allowDuplicates",
71
+
optional = true,
72
+
description = "If the Kafka read allows duplicates.",
73
+
helpText = "If the Kafka read allows duplicates. For example: true",
74
+
example = "true")
75
+
BooleangetAllowDuplicates();
76
76
77
77
@TemplateParameter.Text(
78
78
order = 4,
79
-
name = "outputDeadletterTable",
80
-
optional = false,
81
-
description = "The dead-letter table name to output failed messages to BigQuery",
82
-
helpText =
83
-
"BigQuery table for failed messages. Messages failed to reach the output table for different reasons (e.g., mismatched schema, malformed json) are written to this table. If it doesn't exist, it will be created during pipeline execution. If not specified, 'outputTableSpec_error_records' is used instead. The dead-letter table name to output failed messages to BigQuery.",
84
-
example = "your-project-id:your-dataset.your-table-name")
85
-
@Validation.Required
86
-
StringgetOutputDeadletterTable();
79
+
name = "confluentSchemaRegistrySubject",
80
+
optional = true,
81
+
description = "The subject name for the Confluent Schema Registry.",
82
+
helpText = "The subject name for the Confluent Schema Registry. For example: my_subject",
83
+
example = "my_subject")
84
+
StringgetConfluentSchemaRegistrySubject();
87
85
88
86
@TemplateParameter.Text(
89
87
order = 5,
90
-
name = "messageFormat",
88
+
name = "confluentSchemaRegistryUrl",
91
89
optional = true,
92
-
description = "The message format.",
93
-
helpText ="The message format. One of: AVRO, JSON, PROTO, RAW, or STRING.",
94
-
example = "")
95
-
@Default.String("JSON")
96
-
StringgetMessageFormat();
90
+
description = "The URL for the Confluent Schema Registry.",
91
+
helpText =
92
+
"The URL for the Confluent Schema Registry. For example: http://schema-registry:8081",
93
+
example = "http://schema-registry:8081")
94
+
StringgetConfluentSchemaRegistryUrl();
97
95
98
96
@TemplateParameter.Text(
99
97
order = 6,
98
+
name = "consumerConfigUpdates",
99
+
optional = true,
100
+
description =
101
+
"A list of key-value pairs that act as configuration parameters for Kafka consumers.",
102
+
helpText =
103
+
"A list of key-value pairs that act as configuration parameters for Kafka consumers. For example: {'group.id': 'my_group'}",
104
+
example = "{\"group.id\": \"my_group\"}")
105
+
StringgetConsumerConfigUpdates();
106
+
107
+
@TemplateParameter.Text(
108
+
order = 7,
109
+
name = "fileDescriptorPath",
110
+
optional = true,
111
+
description = "The path to the Protocol Buffer File Descriptor Set file.",
112
+
helpText =
113
+
"The path to the Protocol Buffer File Descriptor Set file. For example: gs://bucket/path/to/descriptor.pb",
114
+
example = "gs://bucket/path/to/descriptor.pb")
115
+
StringgetFileDescriptorPath();
116
+
117
+
@TemplateParameter.Text(
118
+
order = 8,
119
+
name = "format",
120
+
optional = true,
121
+
description = "The encoding format for the data stored in Kafka.",
122
+
helpText =
123
+
"The encoding format for the data stored in Kafka. Valid options are: RAW,STRING,AVRO,JSON,PROTO. For example: JSON",
124
+
example = "JSON")
125
+
@Default.String("JSON")
126
+
StringgetFormat();
127
+
128
+
@TemplateParameter.Text(
129
+
order = 9,
130
+
name = "messageName",
131
+
optional = true,
132
+
description =
133
+
"The name of the Protocol Buffer message to be used for schema extraction and data conversion.",
134
+
helpText =
135
+
"The name of the Protocol Buffer message to be used for schema extraction and data conversion. For example: MyMessage",
136
+
example = "MyMessage")
137
+
StringgetMessageName();
138
+
139
+
@TemplateParameter.Boolean(
140
+
order = 10,
141
+
name = "offsetDeduplication",
142
+
optional = true,
143
+
description = "If the redistribute is using offset deduplication mode.",
144
+
helpText = "If the redistribute is using offset deduplication mode. For example: true",
145
+
example = "true")
146
+
BooleangetOffsetDeduplication();
147
+
148
+
@TemplateParameter.Boolean(
149
+
order = 11,
150
+
name = "redistributeByRecordKey",
151
+
optional = true,
152
+
description = "If the redistribute keys by the Kafka record key.",
153
+
helpText = "If the redistribute keys by the Kafka record key. For example: true",
154
+
example = "true")
155
+
BooleangetRedistributeByRecordKey();
156
+
157
+
@TemplateParameter.Integer(
158
+
order = 12,
159
+
name = "redistributeNumKeys",
160
+
optional = true,
161
+
description = "The number of keys for redistributing Kafka inputs.",
162
+
helpText = "The number of keys for redistributing Kafka inputs. For example: 10",
163
+
example = "10")
164
+
IntegergetRedistributeNumKeys();
165
+
166
+
@TemplateParameter.Boolean(
167
+
order = 13,
168
+
name = "redistributed",
169
+
optional = true,
170
+
description = "If the Kafka read should be redistributed.",
171
+
helpText = "If the Kafka read should be redistributed. For example: true",
172
+
example = "true")
173
+
BooleangetRedistributed();
174
+
175
+
@TemplateParameter.Text(
176
+
order = 14,
100
177
name = "schema",
178
+
optional = true,
179
+
description = "The schema in which the data is encoded in the Kafka topic.",
180
+
helpText =
181
+
"The schema in which the data is encoded in the Kafka topic. For example: {'type': 'record', 'name': 'User', 'fields': [{'name': 'name', 'type': 'string'}]}. A schema is required if data format is JSON, AVRO or PROTO.",
helpText = "Kafka schema. A schema is required if data format is JSON, AVRO or PROTO.",
190
+
description = "BigQuery table",
191
+
helpText =
192
+
"BigQuery table location to write the output to or read from. The name should be in the format <project>:<dataset>.<table_name>`. For write, the table's schema must match input objects.",
104
193
example = "")
105
194
@Validation.Required
106
-
StringgetSchema();
195
+
StringgetTable();
196
+
197
+
@TemplateParameter.Text(
198
+
order = 16,
199
+
name = "createDisposition",
200
+
optional = true,
201
+
description = "How to create",
202
+
helpText =
203
+
"Specifies whether a table should be created if it does not exist. Valid inputs are 'Never' and 'IfNeeded'.",
204
+
example = "")
205
+
@Default.String("CREATE_IF_NEEDED")
206
+
StringgetCreateDisposition();
207
+
208
+
@TemplateParameter.Text(
209
+
order = 17,
210
+
name = "writeDisposition",
211
+
optional = true,
212
+
description = "How to write",
213
+
helpText =
214
+
"How to specify if a write should append to an existing table, replace the table, or verify that the table is empty. Note that the my_dataset being written to must already exist. Unbounded collections can only be written using 'WRITE_EMPTY' or 'WRITE_APPEND'.",
215
+
example = "")
216
+
@Default.String("WRITE_APPEND")
217
+
StringgetWriteDisposition();
107
218
108
219
@TemplateParameter.Integer(
109
-
order = 7,
110
-
name = "numStorageWriteApiStreams",
220
+
order = 18,
221
+
name = "numStreams",
111
222
optional = true,
112
223
description = "Number of streams for BigQuery Storage Write API",
113
224
helpText =
114
225
"Number of streams defines the parallelism of the BigQueryIO’s Write transform and roughly corresponds to the number of Storage Write API’s streams which will be used by the pipeline. See https://cloud.google.com/blog/products/data-analytics/streaming-data-into-bigquery-using-storage-write-api for the recommended values. The default value is 1.",
115
226
example = "")
116
227
@Default.Integer(1)
117
-
IntegergetNumStorageWriteApiStreams();
228
+
IntegergetNumStreams();
118
229
119
-
@TemplateParameter.Integer(
120
-
order = 8,
121
-
name = "storageWriteApiTriggeringFrequencySec",
122
-
optional = true,
123
-
description = "Triggering frequency in seconds for BigQuery Storage Write API",
230
+
@TemplateParameter.Text(
231
+
order = 19,
232
+
name = "outputDeadletterTable",
233
+
optional = false,
234
+
description = "The dead-letter table name to output failed messages to BigQuery",
124
235
helpText =
125
-
"Triggering frequency will determine how soon the data will be visible for querying in BigQuery. See https://cloud.google.com/blog/products/data-analytics/streaming-data-into-bigquery-using-storage-write-api for the recommended values. The default value is 5.",
"BigQuery table for failed messages. Messages failed to reach the output table for different reasons (e.g., mismatched schema, malformed json) are written to this table. If it doesn't exist, it will be created during pipeline execution. If not specified, 'outputTableSpec_error_records' is used instead. The dead-letter table name to output failed messages to BigQuery.",
237
+
example = "your-project-id:your-dataset.your-table-name")
description: "The schema in which the data is encoded in the Kafka topic."
99
-
help: "The schema in which the data is encoded in the Kafka topic. For example: {'type': 'record', 'name': 'User', 'fields': [{'name': 'name', 'type': 'string'}]}"
99
+
help: >
100
+
The schema in which the data is encoded in the Kafka topic.
0 commit comments