22
33import com .clickhouse .client .api .metadata .TableSchema ;
44import com .clickhouse .data .ClickHouseFormat ;
5+ import org .apache .flink .api .common .JobExecutionResult ;
56import org .apache .flink .api .common .eventtime .WatermarkStrategy ;
67import org .apache .flink .api .common .functions .MapFunction ;
78import org .apache .flink .connector .base .sink .writer .ElementConverter ;
3132public class ClickHouseSinkTests extends FlinkClusterTests {
3233
3334 static final int EXPECTED_ROWS = 10000 ;
35+ static final int EXPECTED_ROWS_ON_FAILURE = 0 ;
3436 static final int MAX_BATCH_SIZE = 5000 ;
3537 static final int MAX_IN_FLIGHT_REQUESTS = 2 ;
3638 static final int MAX_BUFFERED_REQUESTS = 20000 ;
@@ -40,7 +42,7 @@ public class ClickHouseSinkTests extends FlinkClusterTests {
4042
4143 static final int STREAM_PARALLELISM = 5 ;
4244
43- private int executeJob (StreamExecutionEnvironment env , String tableName ) throws Exception {
45+ private int executeAsyncJob (StreamExecutionEnvironment env , String tableName ) throws Exception {
4446 JobClient jobClient = env .executeAsync ("Read GZipped CSV with FileSource" );
4547 int rows = 0 ;
4648 int iterations = 0 ;
@@ -109,7 +111,7 @@ void CSVDataTest() throws Exception {
109111 "GzipCsvSource"
110112 );
111113 lines .sinkTo (csvSink );
112- int rows = executeJob (env , tableName );
114+ int rows = executeAsyncJob (env , tableName );
113115 Assertions .assertEquals (EXPECTED_ROWS , rows );
114116 }
115117
@@ -178,7 +180,7 @@ public CovidPOJO map(String value) throws Exception {
178180 });
179181 // send to a sink
180182 covidPOJOs .sinkTo (covidPOJOSink );
181- int rows = executeJob (env , tableName );
183+ int rows = executeAsyncJob (env , tableName );
182184 Assertions .assertEquals (EXPECTED_ROWS , rows );
183185 }
184186
@@ -239,7 +241,65 @@ void SimplePOJODataTest() throws Exception {
239241 DataStream <SimplePOJO > simplePOJOs = env .fromData (simplePOJOList .toArray (new SimplePOJO [0 ]));
240242 // send to a sink
241243 simplePOJOs .sinkTo (simplePOJOSink );
242- int rows = executeJob (env , tableName );
244+ int rows = executeAsyncJob (env , tableName );
243245 Assertions .assertEquals (EXPECTED_ROWS , rows );
244246 }
247+
248+ @ Test
249+ void CSVDataOnFailureDropDataTest () throws Exception {
250+ String tableName = "csv_failure_covid" ;
251+ String dropTable = String .format ("DROP TABLE IF EXISTS `%s`.`%s`" , getDatabase (), tableName );
252+ ClickHouseServerForTests .executeSql (dropTable );
253+ // create table
254+ String tableSql = "CREATE TABLE `" + getDatabase () + "`.`" + tableName + "` (" +
255+ "date Date," +
256+ "location_key LowCardinality(String)," +
257+ "new_confirmed Int32," +
258+ "new_deceased Int32," +
259+ "new_recovered Int32," +
260+ "new_tested Int32," +
261+ "cumulative_confirmed Int32," +
262+ "cumulative_deceased Int32," +
263+ "cumulative_recovered Int32," +
264+ "cumulative_tested Int32" +
265+ ") " +
266+ "ENGINE = MergeTree " +
267+ "ORDER BY (location_key, date); " ;
268+ ClickHouseServerForTests .executeSql (tableSql );
269+
270+ final StreamExecutionEnvironment env = EmbeddedFlinkClusterForTests .getMiniCluster ().getTestStreamEnvironment ();
271+ env .setParallelism (STREAM_PARALLELISM );
272+
273+
274+ ClickHouseClientConfig clickHouseClientConfig = new ClickHouseClientConfig (getServerURL (), getUsername (), getPassword (), getDatabase (), tableName );
275+ ElementConverter <String , ClickHousePayload > convertorString = new ClickHouseConvertor <>(String .class );
276+ // create sink
277+ ClickHouseAsyncSink <String > csvSink = new ClickHouseAsyncSink <>(
278+ convertorString ,
279+ MAX_BATCH_SIZE ,
280+ MAX_IN_FLIGHT_REQUESTS ,
281+ MAX_BUFFERED_REQUESTS ,
282+ MAX_BATCH_SIZE_IN_BYTES ,
283+ MAX_TIME_IN_BUFFER_MS ,
284+ MAX_RECORD_SIZE_IN_BYTES ,
285+ clickHouseClientConfig
286+ );
287+ csvSink .setClickHouseFormat (ClickHouseFormat .TSV );
288+
289+ Path filePath = new Path ("./src/test/resources/epidemiology_top_10000.csv.gz" );
290+
291+ FileSource <String > source = FileSource
292+ .forRecordStreamFormat (new TextLineInputFormat (), filePath )
293+ .build ();
294+ // read csv data from file
295+ DataStreamSource <String > lines = env .fromSource (
296+ source ,
297+ WatermarkStrategy .noWatermarks (),
298+ "GzipCsvSource"
299+ );
300+ lines .sinkTo (csvSink );
301+ // TODO: make the test smarter by checking the counter of numOfDroppedRecords equals EXPECTED_ROWS
302+ int rows = executeAsyncJob (env , tableName );
303+ Assertions .assertEquals (EXPECTED_ROWS_ON_FAILURE , rows );
304+ }
245305}
0 commit comments