diff --git a/.gitignore b/.gitignore index 64577b615..ad8c22082 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ bin/ build/ +target/ *.class *.jar *.tar +*.zip .gradle @@ -22,6 +24,6 @@ build/ key.json test.conf -kcbq-connector/src/integration-test/resources/test.properties +kcbq-connector/src/test/resources/test.properties kcbq-connector/test/docker/connect/properties/ kcbq-connector/out/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f1b4fbf59..000000000 --- a/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -language: java -sudo: true -dist: trusty -group: edge - -jdk: - - oraclejdk8 - - openjdk8 - - openjdk11 - -matrix: - fast_finish: true - -script: - - ./gradlew test - -after_success: - - if [ -e ./gradlew ]; then ./gradlew jacocoTestReport; else gradle jacocoTestReport; fi - - bash <(curl -s https://codecov.io/bash) - -before_cache: - - rm -f $HOME/.gradle/caches/modules-2/modules-2.lock - - rm -fr $HOME/.gradle/caches/*/plugin-resolution/ - -cache: - directories: - - $HOME/.gradle/caches/ - - $HOME/.gradle/wrapper/ - -notifications: - email: - - open-source@wepay.com diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 000000000..a080d23d6 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,25 @@ +#!/usr/bin/env groovy +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +common { + slackChannel = '#connect-warn' + nodeLabel = 'docker-oraclejdk8' + publish = false + downStreamValidate = false +} diff --git a/README.md b/README.md index 6bbfc4a9a..ca3603757 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ save the properties file. Once you get more familiar with the connector, you might want to revisit the `connector.properties` file and experiment with tweaking its settings. -### Building and Extracting a Tarball +### Building and Extracting a Confluent Hub archive If you haven't already, move into the repository's top-level directory: @@ -49,16 +49,16 @@ If you haven't already, move into the repository's top-level directory: $ cd /path/to/kafka-connect-bigquery/ ``` -Begin by creating a tarball of the connector with the Confluent Schema Retriever included: +Begin by creating Confluent Hub archive of the connector with the Confluent Schema Retriever included: ```bash -$ ./gradlew clean distTar +$ mvn clean package -DskipTests ``` And then extract its contents: ```bash -$ mkdir -p bin/jar/ && tar -C bin/jar/ -xf kcbq-confluent/build/distributions/kcbq-confluent-*.tar +$ mkdir -p bin/jar/ && cp kcbq-connector/target/components/packages/wepay-kafka-connect-bigquery-*/wepay-kafka-connect-bigquery-*/lib/*.jar bin/jar/ ``` ### Setting-Up Background Processes @@ -196,7 +196,7 @@ cannot occupy more than one line** (this inconvenience is due to limitations in Console Producer, and may be addressed in future commits). To specify data verification, add a new JUnit test to the file -`src/integration-test/java/com/wepay/kafka/connect/bigquery/it/BigQueryConnectorIntegrationTest.java`. +`src/test/java/com/wepay/kafka/connect/bigquery/it/BigQueryConnectorIntegrationTest.java`. Rows that are retrieved from BigQuery in the test are only returned as _Lists_ of _Objects_. The names of their columns are not tracked. Construct a _List_ of the _Objects_ that you expect to be stored in the test's BigQuery table, retrieve the actual _List_ of _Objects_ stored via a call to diff --git a/build.gradle b/build.gradle deleted file mode 100644 index 1652f5af4..000000000 --- a/build.gradle +++ /dev/null @@ -1,369 +0,0 @@ -plugins { - id "com.github.spotbugs" version "2.0.0" -} - -// BEGIN ALL PROJECTS // -allprojects { - apply plugin: 'java' - - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 -} - -def withoutKafka = { - exclude group: 'org.apache.kafka', module: 'connect-api' - exclude group: 'org.apache.kafka', module: 'connect-transforms' - exclude group: 'org.apache.kafka', module: 'kafka-clients' -} - -// END ALL PROJECTS - -project.ext { - apacheHttpClientVersion = '4.5.6' - avroVersion = '1.8.1' - debeziumVersion = '0.6.1' - googleCloudVersion = '1.79.0' - googleAuthVersion = '0.9.0' - googleCloudGsonVersion = '2.8.5' - ioConfluentVersion = '5.5.0' - junitVersion = '4.12' - kafkaVersion = '2.5.0' - mockitoVersion = '3.2.4' - slf4jVersion = '1.6.1' -} - -// BEGIN SUBPROJECTS // -subprojects { subproject -> - - apply plugin: 'maven' - apply plugin: 'signing' - apply plugin: 'checkstyle' - apply plugin: 'idea' - apply plugin: 'eclipse' - - jar.baseName = subproject.name - - [compileJava, compileTestJava].each { - it.options.compilerArgs << '-Xlint:unchecked' - } - - checkstyle { - configFile = file("${rootDir}/config/checkstyle/google_checks.xml") - toolVersion = '6.18' - } - - spotbugsMain { - reports { - xml.enabled = false - html.enabled = true - } - } - - task javadocJar(type: Jar) { - classifier = 'javadoc' - from javadoc - } - - task sourcesJar(type: Jar) { - classifier = 'sources' - from sourceSets.main.allSource - } - - signing { - sign configurations.archives - required { - gradle.taskGraph.hasTask('uploadArchives') - } - } - - uploadArchives { - repositories { - mavenDeployer { - beforeDeployment { - MavenDeployment deployment -> signing.signPom(deployment) - } - - repository(url: 'https://oss.sonatype.org/service/local/staging/deploy/maven2') { - authentication(userName: findProperty('ossrhUsername') ?: '', password: findProperty('ossrhPassword') ?: '') - } - - snapshotRepository(url: 'https://oss.sonatype.org/content/repositories/snapshots') { - authentication(userName: findProperty('ossrhUsername') ?: '', password: findProperty('ossrhPassword') ?: '') - } - - pom.project { - - licenses { - license { - name 'The Apache License, Version 2.0' - url 'http://www.apache.org/licenses/LICENSE-2.0.txt' - } - } - - scm { - connection 'scm:git:git://github.com/wepay/kafka-connect-bigquery.git' - developerConnection 'scm:git:ssh://github.com:wepay/kafka-connect-bigquery.git' - url 'https://github.com/wepay/kafka-connect-bigquery' - } - - developers { - developer { - id 'C0urante' - name 'Chris Egerton' - email 'fearthecellos@gmail.comw' - } - - developer { - id 'moirat' - name 'Moira Tagle' - email 'moirat@wepay.com' - } - } - } - } - } - } -} -// END SUBPROJECTS - -// BEGIN INDIVIDUAL PROJECTS -project(':kcbq-connector') { - apply plugin: 'jacoco' - - jar { - manifest { - attributes 'Implementation-Title': 'Kafka Connect BigQuery Connector', - 'Implementation-Version': version - - } - } - - repositories { - mavenCentral() - } - - sourceSets { - integrationTest { - java { - compileClasspath += main.output - runtimeClasspath += main.output - srcDir file('src/integration-test/java') - } - resources.srcDir file('src/integration-test/resources') - } - } - - task integrationTestPrep() { - dependsOn 'integrationTestTablePrep' - dependsOn 'integrationTestBucketPrep' - } - - task integrationTestTablePrep(type: JavaExec) { - main = 'com.wepay.kafka.connect.bigquery.it.utils.TableClearer' - classpath = sourceSets.integrationTest.runtimeClasspath - args findProperty('kcbq_test_keyfile') ?: '' - args findProperty('kcbq_test_project') ?: '' - args findProperty('kcbq_test_dataset') ?: '' - args findProperty('kcbq_test_keysource') ?: '' - if (findProperty('kcbq_test_tables') != null) - args findProperty('kcbq_test_tables').split(' ') - } - - task integrationTestBucketPrep(type: JavaExec) { - main = 'com.wepay.kafka.connect.bigquery.it.utils.BucketClearer' - classpath = sourceSets.integrationTest.runtimeClasspath - args findProperty('kcbq_test_keyfile') ?: '' - args findProperty('kcbq_test_project') ?: '' - args findProperty('kcbq_test_bucket') ?: '' - args findProperty('kcbq_test_keysource') ?: '' - } - - task integrationTest(type: Test) { - testClassesDirs = sourceSets.integrationTest.output.classesDirs - classpath = sourceSets.integrationTest.runtimeClasspath - } - - compileIntegrationTestJava.options.compilerArgs << '-Xlint:unchecked' - - configurations { - integrationTestCompile.extendsFrom testCompile - integrationTestRuntime.extendsFrom testRuntime - } - - javadoc { - options.links 'http://docs.oracle.com/javase/8/docs/api/' - options.links 'http://docs.confluent.io/3.2.0/connect/javadocs/' - options.links 'https://googleapis.dev/java/google-cloud-clients/0.97.0-alpha/' - options.links 'https://kafka.apache.org/0100/javadoc/' - options.links 'https://avro.apache.org/docs/1.8.1/api/java/' - } - - jacocoTestReport { - reports { - html.destination file("${buildDir}/reports/jacoco/") - xml.enabled true - } - } - - dependencies { - compile ( - project(':kcbq-api'), - - "com.google.cloud:google-cloud-bigquery:$googleCloudVersion", - "com.google.cloud:google-cloud-storage:$googleCloudVersion", - "com.google.auth:google-auth-library-oauth2-http:$googleAuthVersion", - "com.google.code.gson:gson:$googleCloudGsonVersion", - "org.slf4j:slf4j-api:$slf4jVersion", - ) - - compile "io.debezium:debezium-core:$debeziumVersion", withoutKafka - - compileOnly ( - "org.apache.kafka:connect-api:$kafkaVersion" - ) - - testCompile ( - "junit:junit:$junitVersion", - "org.mockito:mockito-core:$mockitoVersion", - "org.mockito:mockito-inline:$mockitoVersion", - "org.apache.kafka:connect-api:$kafkaVersion" - ) - } - - artifacts { - archives javadocJar, sourcesJar - } - - uploadArchives { - repositories { - mavenDeployer { - pom.project { - name 'Kafka Connect BigQuery Connector' - packaging 'jar' - description 'A Kafka Connector used to load data into BigQuery' - url 'https://github.com/wepay/kafka-connect-bigquery' - } - } - } - } -} - -project('kcbq-api') { - jar { - manifest { - attributes 'Implementation-Title': 'Kafka Connect BigQuery API', - 'Implementation-Version': version - } - } - - repositories { - mavenCentral() - } - - javadoc { - options.links 'http://docs.oracle.com/javase/8/docs/api/' - options.links 'http://docs.confluent.io/3.2.0/connect/javadocs/' - } - - dependencies { - compile "com.google.cloud:google-cloud-bigquery:$googleCloudVersion" - - compileOnly "org.apache.kafka:connect-api:$kafkaVersion" - } - - artifacts { - archives javadocJar, sourcesJar - } - - uploadArchives { - repositories { - mavenDeployer { - pom.project { - name 'Kafka Connect BigQuery Connector API' - packaging 'jar' - description 'A small API for the Kafka Connector used to load data into BigQuery' - url 'https://github.com/wepay/kafka-connect-bigquery' - } - } - } - } -} - -project('kcbq-confluent') { - apply plugin: 'distribution' - - distributions { - main { - baseName = 'kcbq-confluent' - contents { - from configurations.runtime, jar - } - } - } - - jar { - manifest { - attributes 'Implementation-Title': 'Kafka Connect BigQuery Schema Registry Schema Retriever', - 'Implementation-Version': version - } - } - - repositories { - mavenCentral() - maven { - url 'http://packages.confluent.io/maven' - } - jcenter() - } - - javadoc { - options.links 'http://docs.oracle.com/javase/8/docs/api/' - options.links 'http://docs.confluent.io/3.2.0/connect/javadocs/' - } - - dependencies { - - compile ( - project(':kcbq-connector'), - project(':kcbq-api'), - - "org.apache.avro:avro:$avroVersion", - "org.slf4j:slf4j-api:$slf4jVersion", - ) - - compile "io.confluent:kafka-connect-avro-converter:$ioConfluentVersion", withoutKafka - compile "io.confluent:kafka-schema-registry-client:$ioConfluentVersion", withoutKafka - - compileOnly ( - "org.apache.kafka:connect-api:$kafkaVersion", - "org.apache.kafka:kafka-clients:$kafkaVersion" - ) - - testCompile ( - "junit:junit:$junitVersion", - "org.mockito:mockito-core:$mockitoVersion", - "org.mockito:mockito-inline:$mockitoVersion", - "org.apache.kafka:connect-api:$kafkaVersion", - "org.apache.kafka:kafka-clients:$kafkaVersion" - - ) - } - - artifacts { - archives javadocJar, sourcesJar, distTar - } - - uploadArchives { - repositories { - mavenDeployer { - pom.project { - name 'Kafka Connect BigQuery Connector Schema Registry Schema Retriever' - packaging 'jar' - description 'A Schema Registry-based schema retriever for the Kafka Connector used to load data into BigQuery' - url 'https://github.com/wepay/kafka-connect-bigquery' - } - } - } - } -} -// END INDIVIDUAL PROJECTS diff --git a/codecov.yml b/codecov.yml deleted file mode 100644 index c644d5794..000000000 --- a/codecov.yml +++ /dev/null @@ -1,43 +0,0 @@ -codecov: - branch: master - bot: skyzyx - -coverage: - precision: 2 - round: down - range: "70...100" - - status: - project: - default: - target: auto - threshold: 1.25 - branches: - - master - - feature/* - - patch: - default: - target: auto - branches: - - master - - feature/* - - changes: - default: - branches: - - master - - feature/* - - ignore: - - config/.* - - gradle/.* - - test/.* - - .*/vendor/.* - -comment: - layout: "header, diff, changes, sunburst, uncovered, tree" - behavior: default - branches: - - master - - feature/* diff --git a/config/checkstyle/suppressions.xml b/config/checkstyle/suppressions.xml new file mode 100644 index 000000000..f7f6089d5 --- /dev/null +++ b/config/checkstyle/suppressions.xml @@ -0,0 +1,27 @@ + + + + + + \ No newline at end of file diff --git a/config/copyright/custom-header-styles.xml b/config/copyright/custom-header-styles.xml new file mode 100644 index 000000000..4b296d70b --- /dev/null +++ b/config/copyright/custom-header-styles.xml @@ -0,0 +1,44 @@ + + + + + /* + * + */EOL + (\s|\t)*/\*.*$ + .*\*/(\s|\t)*$ + false + true + false + + + /* + * + */ + #!.* + (\s|\t)*/\*.* + .*\*/(\s|\t)*$ + false + true + false + + \ No newline at end of file diff --git a/gradle.properties b/gradle.properties deleted file mode 100644 index 7e259101b..000000000 --- a/gradle.properties +++ /dev/null @@ -1,2 +0,0 @@ -group=com.wepay.kcbq -version=1.6.5 diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index 94336fcae..000000000 Binary files a/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index b0acbdcd7..000000000 --- a/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,5 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-5.5-bin.zip -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew deleted file mode 100755 index cccdd3d51..000000000 --- a/gradlew +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env sh - -############################################################################## -## -## Gradle start up script for UN*X -## -############################################################################## - -# Attempt to set APP_HOME -# Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi -done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null - -APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS="" - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" - -warn () { - echo "$*" -} - -die () { - echo - echo "$*" - echo - exit 1 -} - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; - NONSTOP* ) - nonstop=true - ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" - else - JAVACMD="$JAVA_HOME/bin/java" - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD="java" - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." -fi - -# Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi -fi - -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi - -# For Cygwin, switch paths to Windows format before running java -if $cygwin ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi - # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" - fi - i=$((i+1)) - done - case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac -fi - -# Escape application args -save () { - for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done - echo " " -} -APP_ARGS=$(save "$@") - -# Collect all arguments for the java command, following the shell quoting and substitution rules -eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" - -# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong -if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then - cd "$(dirname "$0")" -fi - -exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat deleted file mode 100644 index e95643d6a..000000000 --- a/gradlew.bat +++ /dev/null @@ -1,84 +0,0 @@ -@if "%DEBUG%" == "" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS= - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto init - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:init -@rem Get command-line arguments, handling Windows variants - -if not "%OS%" == "Windows_NT" goto win9xME_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% - -:end -@rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/kcbq-api/pom.xml b/kcbq-api/pom.xml new file mode 100644 index 000000000..f60fd834a --- /dev/null +++ b/kcbq-api/pom.xml @@ -0,0 +1,63 @@ + + + + 4.0.0 + + + com.wepay.kcbq + kcbq-parent + 1.6.11-SNAPSHOT + .. + + + kcbq-api + kafka-connect-bigquery-api + + + ${project.parent.basedir} + + + + + org.apache.kafka + connect-api + + + + com.google.cloud + google-cloud-bigquery + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + + diff --git a/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/KafkaSchemaRecordType.java b/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/KafkaSchemaRecordType.java index 8b197c416..b7b1b0c0b 100644 --- a/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/KafkaSchemaRecordType.java +++ b/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/KafkaSchemaRecordType.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.api; diff --git a/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/SchemaRetriever.java b/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/SchemaRetriever.java index a948d84a7..3c9db6252 100644 --- a/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/SchemaRetriever.java +++ b/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/SchemaRetriever.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.api; import com.google.cloud.bigquery.TableId; diff --git a/kcbq-confluent/pom.xml b/kcbq-confluent/pom.xml new file mode 100644 index 000000000..4c18b3dd7 --- /dev/null +++ b/kcbq-confluent/pom.xml @@ -0,0 +1,199 @@ + + + + 4.0.0 + + + com.wepay.kcbq + kcbq-parent + 1.6.11-SNAPSHOT + .. + + + kcbq-confluent + kafka-connect-bigquery-confluent + + + ${project.parent.basedir} + + + + + com.wepay.kcbq + kcbq-api + + + + org.apache.kafka + connect-api + + + + com.google.cloud + google-cloud-bigquery + + + org.slf4j + slf4j-api + + + org.apache.avro + avro + + + io.confluent + kafka-connect-avro-converter + + + io.confluent + kafka-schema-registry-client + + + + junit + junit + + + org.mockito + mockito-core + + + org.slf4j + slf4j-log4j12 + + + + + + + org.jacoco + jacoco-maven-plugin + + + prepare-agent + + prepare-agent + + + + prepare-agent-it + + prepare-agent-integration + + pre-integration-test + + + merge-coverage-reports + verify + + merge + + + + + ${project.basedir} + + /target/jacoco.exec + /target/jacoco-it.exec + + + + ${project.basedir}/target/jacoco-aggregate.exec + + + + check + + check + + + + + BUNDLE + + + INSTRUCTION + COVEREDRATIO + 0.60 + + + BRANCH + COVEREDRATIO + 0.60 + + + COMPLEXITY + COVEREDRATIO + 0.60 + + + LINE + COVEREDRATIO + 0.60 + + + METHOD + COVEREDRATIO + 0.60 + + + CLASS + COVEREDRATIO + 0.80 + + + + + ${project.basedir}/target/jacoco-aggregate.exec + + + + report + test + + report + + + ${project.basedir}/target/jacoco-aggregate.exec + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + org.jacoco + jacoco-maven-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + + diff --git a/kcbq-confluent/src/main/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetriever.java b/kcbq-confluent/src/main/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetriever.java index c6de0fe7a..e41cd41da 100644 --- a/kcbq-confluent/src/main/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetriever.java +++ b/kcbq-confluent/src/main/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetriever.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.schemaregistry.schemaretriever; import com.google.cloud.bigquery.TableId; diff --git a/kcbq-confluent/src/main/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverConfig.java b/kcbq-confluent/src/main/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverConfig.java index 4251d0d7f..ab22fbd17 100644 --- a/kcbq-confluent/src/main/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverConfig.java +++ b/kcbq-confluent/src/main/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverConfig.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.schemaregistry.schemaretriever; import org.apache.kafka.common.config.AbstractConfig; diff --git a/kcbq-confluent/src/test/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverConfigTest.java b/kcbq-confluent/src/test/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverConfigTest.java index 11be78f44..75fcd9b1f 100644 --- a/kcbq-confluent/src/test/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverConfigTest.java +++ b/kcbq-confluent/src/test/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverConfigTest.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.schemaregistry.schemaretriever; import io.confluent.kafka.schemaregistry.client.SchemaRegistryClientConfig; diff --git a/kcbq-confluent/src/test/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverTest.java b/kcbq-confluent/src/test/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverTest.java index c45a13183..5c9ce1d69 100644 --- a/kcbq-confluent/src/test/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverTest.java +++ b/kcbq-confluent/src/test/java/com/wepay/kafka/connect/bigquery/schemaregistry/schemaretriever/SchemaRegistrySchemaRetrieverTest.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.schemaregistry.schemaretriever; import static org.junit.Assert.assertEquals; diff --git a/kcbq-confluent/src/test/resources/log4j.properties b/kcbq-confluent/src/test/resources/log4j.properties new file mode 100644 index 000000000..94fb72b55 --- /dev/null +++ b/kcbq-confluent/src/test/resources/log4j.properties @@ -0,0 +1,33 @@ +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +log4j.rootLogger=INFO, stdout + +# Send the logs to the console. +# +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + +connect.log.pattern=[%d] %p %X{connector.context}%m (%c:%L)%n +log4j.appender.stdout.layout.ConversionPattern=${connect.log.pattern} +log4j.appender.connectAppender.layout.ConversionPattern=${connect.log.pattern} + +# These are used in the log4j properties file that ships by default with Connect +log4j.logger.org.apache.zookeeper=ERROR +log4j.logger.org.reflections=ERROR diff --git a/kcbq-connector/logos/BigQuery.png b/kcbq-connector/logos/BigQuery.png new file mode 100644 index 000000000..a7e0a7156 Binary files /dev/null and b/kcbq-connector/logos/BigQuery.png differ diff --git a/kcbq-connector/logos/confluent.png b/kcbq-connector/logos/confluent.png new file mode 100644 index 000000000..14cd8c506 Binary files /dev/null and b/kcbq-connector/logos/confluent.png differ diff --git a/kcbq-connector/pom.xml b/kcbq-connector/pom.xml new file mode 100644 index 000000000..0428b78b6 --- /dev/null +++ b/kcbq-connector/pom.xml @@ -0,0 +1,179 @@ + + + + 4.0.0 + + + com.wepay.kcbq + kcbq-parent + 1.6.11-SNAPSHOT + .. + + + kcbq-connector + kafka-connect-bigquery + + + ${project.parent.basedir} + + + + + org.apache.kafka + connect-api + + + + com.fasterxml.jackson.core + jackson-core + + + com.google.errorprone + error_prone_annotations + + + com.google.code.gson + gson + + + com.google.cloud + google-cloud-bigquery + + + com.google.cloud + google-cloud-storage + + + com.google.auth + google-auth-library-oauth2-http + + + org.slf4j + slf4j-api + + + io.debezium + debezium-core + + + + com.wepay.kcbq + kcbq-api + + + com.wepay.kcbq + kcbq-confluent + + + + junit + junit + + + org.mockito + mockito-core + + + org.slf4j + slf4j-log4j12 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + org.jacoco + jacoco-maven-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + io.confluent + kafka-connect-maven-plugin + + + + kafka-connect + + + BigQuery Sink Connector + kafka-connect-bigquery + + A sink connector for writing to Google BigQuery, with support for automatic table creation and schema evolution. + + logos/BigQuery.png + https://docs.confluent.io/kafka-connect-bigquery/current/index.html + https://github.com/confluentinc/kafka-connect-bigquery + + Confluent, Inc. + supports WePay's BigQuery connector version 1.1.2 and later, as part of a Confluent Platform subscription. + ]]> + https://docs.confluent.io/kafka-connect-bigquery/current/index.html + logos/confluent.png + + wepay + organization + WePay + https://go.wepay.com/ + + true + + + sink + + + + cloud + analytics + data + gcp + google + bigquery + warehouse + platform + nosql + + + + Apache Kafka 0.11 or higher / Confluent Platform 3.3 or higher + Java 1.8 or higher + Active Google Cloud Platform (GCP) account with authorization to create resources + Kafka Connect 0.11 or higher / Confluent Platform 3.3 or higher + + + + + + + + diff --git a/kcbq-connector/quickstart/avro-console-producer.sh b/kcbq-connector/quickstart/avro-console-producer.sh index a7fe02118..9065f0cb3 100755 --- a/kcbq-connector/quickstart/avro-console-producer.sh +++ b/kcbq-connector/quickstart/avro-console-producer.sh @@ -1,5 +1,8 @@ #! /usr/bin/env bash -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +16,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# BASE_DIR=`dirname "$0"` diff --git a/kcbq-connector/quickstart/connector.sh b/kcbq-connector/quickstart/connector.sh index 5c9dcecd9..123e9bbe9 100755 --- a/kcbq-connector/quickstart/connector.sh +++ b/kcbq-connector/quickstart/connector.sh @@ -1,5 +1,8 @@ #! /usr/bin/env bash -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +16,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# BASE_DIR="$(cd "$(dirname "$0")" && pwd)" diff --git a/kcbq-connector/quickstart/kafka.sh b/kcbq-connector/quickstart/kafka.sh index 953c0d3f8..2ce3391ab 100755 --- a/kcbq-connector/quickstart/kafka.sh +++ b/kcbq-connector/quickstart/kafka.sh @@ -1,5 +1,8 @@ #! /usr/bin/env bash -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +16,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# BASE_DIR=`dirname "$0"` diff --git a/kcbq-connector/quickstart/properties/connector.properties b/kcbq-connector/quickstart/properties/connector.properties index 5eb5fc3b3..7e6284215 100644 --- a/kcbq-connector/quickstart/properties/connector.properties +++ b/kcbq-connector/quickstart/properties/connector.properties @@ -1,4 +1,7 @@ -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +15,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# name=bigquery-connector connector.class=com.wepay.kafka.connect.bigquery.BigQuerySinkConnector @@ -25,10 +29,6 @@ autoUpdateSchemas=true schemaRetriever=com.wepay.kafka.connect.bigquery.schemaregistry.schemaretriever.SchemaRegistrySchemaRetriever schemaRegistryLocation=http://localhost:8081 -bufferSize=100000 -maxWriteSize=10000 -tableWriteWait=1000 - ########################################### Fill me in! ########################################### # The name of the BigQuery project to write to project= diff --git a/kcbq-connector/quickstart/properties/standalone.properties b/kcbq-connector/quickstart/properties/standalone.properties index 2aee81055..1450e07cc 100644 --- a/kcbq-connector/quickstart/properties/standalone.properties +++ b/kcbq-connector/quickstart/properties/standalone.properties @@ -1,4 +1,7 @@ -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +15,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# bootstrap.servers=localhost:9092 key.converter=io.confluent.connect.avro.AvroConverter diff --git a/kcbq-connector/quickstart/schema-registry.sh b/kcbq-connector/quickstart/schema-registry.sh index 5b5dfd3a6..61735fabc 100755 --- a/kcbq-connector/quickstart/schema-registry.sh +++ b/kcbq-connector/quickstart/schema-registry.sh @@ -1,5 +1,8 @@ #! /usr/bin/env bash -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +16,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# BASE_DIR=`dirname "$0"` diff --git a/kcbq-connector/quickstart/zookeeper.sh b/kcbq-connector/quickstart/zookeeper.sh index ad5a88205..3e5fcbdcc 100755 --- a/kcbq-connector/quickstart/zookeeper.sh +++ b/kcbq-connector/quickstart/zookeeper.sh @@ -1,5 +1,8 @@ #! /usr/bin/env bash -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +16,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# BASE_DIR=`dirname "$0"` diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQueryHelper.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQueryHelper.java deleted file mode 100644 index 1ad008e4b..000000000 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQueryHelper.java +++ /dev/null @@ -1,106 +0,0 @@ -package com.wepay.kafka.connect.bigquery; - -/* - * Copyright 2016 WePay, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import com.google.auth.oauth2.GoogleCredentials; -import com.google.cloud.bigquery.BigQuery; -import com.google.cloud.bigquery.BigQueryOptions; - -import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.ByteArrayInputStream; -import java.nio.charset.StandardCharsets; - -/** - * Convenience class for creating a default {@link com.google.cloud.bigquery.BigQuery} instance, - * with or without login credentials. - */ -public class BigQueryHelper { - private static final Logger logger = LoggerFactory.getLogger(BigQueryHelper.class); - private static String keySource; - - /** - * Returns a default {@link BigQuery} instance for the specified project with credentials provided - * in the specified file, which can then be used for creating, updating, and inserting into tables - * from specific datasets. - * - * @param projectName The name of the BigQuery project to work with - * @param key The google credentials JSON key that can be used to provide - * credentials to BigQuery, or null if no authentication should be performed. - * @return The resulting BigQuery object. - */ - public BigQuery connect(String projectName, String key) { - if (key == null) { - return connect(projectName); - } - logger.debug("Attempting to open file {} for service account json key", key); - InputStream credentialsStream; - try { - if (keySource != null && keySource.equals("JSON")) { - credentialsStream = new ByteArrayInputStream(key.getBytes(StandardCharsets.UTF_8)); - } else { - credentialsStream = new FileInputStream(key); - } - return new - BigQueryOptions.DefaultBigQueryFactory().create( - BigQueryOptions.newBuilder() - .setProjectId(projectName) - .setCredentials(GoogleCredentials.fromStream(credentialsStream)) - .build() - ); - } catch (IOException err) { - throw new BigQueryConnectException("Failed to access json key file", err); - } - } - /** - * Returns a default {@link BigQuery} instance for the specified project with credentials provided - * in the specified file, which can then be used for creating, updating, and inserting into tables - * from specific datasets. - * - * @param keySource The type of key config we can expect. This is either a String - * representation of the Google credentials file, or the path to the Google credentials file. - * @return The resulting BigQuery object. - */ - public BigQueryHelper setKeySource(String keySource) { - this.keySource = keySource; - return this; - } - - /** - * Returns a default {@link BigQuery} instance for the specified project with no authentication - * credentials, which can then be used for creating, updating, and inserting into tables from - * specific datasets. - * - * @param projectName The name of the BigQuery project to work with - * @return The resulting BigQuery object. - */ - public BigQuery connect(String projectName) { - logger.debug("Attempting to access BigQuery without authentication"); - return new BigQueryOptions.DefaultBigQueryFactory().create( - BigQueryOptions.newBuilder() - .setProjectId(projectName) - .build() - ); - } -} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnector.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnector.java index 99a22b807..05c912e07 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnector.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnector.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,25 +17,18 @@ * under the License. */ - -import com.google.cloud.bigquery.BigQuery; -import com.google.cloud.bigquery.TableId; - -import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; +package com.wepay.kafka.connect.bigquery; import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; -import com.wepay.kafka.connect.bigquery.convert.SchemaConverter; +import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; -import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; -import com.wepay.kafka.connect.bigquery.exception.SinkConfigConnectException; - -import com.wepay.kafka.connect.bigquery.utils.TopicToTableResolver; import com.wepay.kafka.connect.bigquery.utils.Version; +import org.apache.kafka.common.config.Config; import org.apache.kafka.common.config.ConfigDef; -import org.apache.kafka.common.config.ConfigException; +import org.apache.kafka.common.config.ConfigValue; import org.apache.kafka.connect.connector.Task; import org.apache.kafka.connect.sink.SinkConnector; @@ -46,85 +39,43 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; /** * A {@link SinkConnector} used to delegate BigQuery data writes to * {@link org.apache.kafka.connect.sink.SinkTask SinkTasks}. */ public class BigQuerySinkConnector extends SinkConnector { - private final BigQuery testBigQuery; - private final SchemaManager testSchemaManager; - - public static final String GCS_BQ_TASK_CONFIG_KEY = "GCSBQTask"; - - public BigQuerySinkConnector() { - testBigQuery = null; - testSchemaManager = null; - } - - // For testing purposes only; will never be called by the Kafka Connect framework - BigQuerySinkConnector(BigQuery bigQuery) { - this.testBigQuery = bigQuery; - this.testSchemaManager = null; - } - - // For testing purposes only; will never be called by the Kafka Connect framework - BigQuerySinkConnector(BigQuery bigQuery, SchemaManager schemaManager) { - this.testBigQuery = bigQuery; - this.testSchemaManager = schemaManager; - } - private BigQuerySinkConfig config; - private Map configProperties; + BigQuerySinkConfig config; + Map configProperties; private static final Logger logger = LoggerFactory.getLogger(BigQuerySinkConnector.class); @Override public ConfigDef config() { logger.trace("connector.config()"); - return config.getConfig(); - } - - private BigQuery getBigQuery() { - if (testBigQuery != null) { - return testBigQuery; - } - String projectName = config.getString(config.PROJECT_CONFIG); - String key = config.getKeyFile(); - String keySource = config.getString(config.KEY_SOURCE_CONFIG); - return new BigQueryHelper().setKeySource(keySource).connect(projectName, key); + return BigQuerySinkConfig.getConfig(); } - private void ensureExistingTables() { - BigQuery bigQuery = getBigQuery(); - Map topicsToTableIds = TopicToTableResolver.getTopicsToTables(config); - for (TableId tableId : topicsToTableIds.values()) { - if (bigQuery.getTable(tableId) == null) { - logger.warn( - "You may want to enable auto table creation by setting {}=true in the properties file", - config.TABLE_CREATE_CONFIG); - throw new BigQueryConnectException("Table '" + tableId + "' does not exist"); - } + @Override + public Config validate(Map properties) { + List singlePropertyValidations = config().validate(properties); + // If any of our properties had malformed syntax or failed a validation to ensure, e.g., that it fell within an + // acceptable numeric range, we only report those errors since they prevent us from being able to construct a + // valid BigQuerySinkConfig instance + if (singlePropertyValidations.stream().anyMatch(v -> !v.errorMessages().isEmpty())) { + return new Config(singlePropertyValidations); } + return new BigQuerySinkConfig(properties).validate(); } @Override public void start(Map properties) { logger.trace("connector.start()"); - try { - configProperties = properties; - config = new BigQuerySinkConfig(properties); - } catch (ConfigException err) { - throw new SinkConfigConnectException( - "Couldn't start BigQuerySinkConnector due to configuration error", - err - ); - } - - if (!config.getBoolean(config.TABLE_CREATE_CONFIG)) { - ensureExistingTables(); - } + configProperties = properties; + config = new BigQuerySinkConfig(properties); + // Revalidate here in case the connector has been upgraded and its old config is no longer valid + config.ensureValid(); } @Override @@ -147,7 +98,7 @@ public List> taskConfigs(int maxTasks) { HashMap taskConfig = new HashMap<>(configProperties); if (i == 0 && !config.getList(BigQuerySinkConfig.ENABLE_BATCH_CONFIG).isEmpty()) { // if batch loading is enabled, configure first task to do the GCS -> BQ loading - taskConfig.put(GCS_BQ_TASK_CONFIG_KEY, "true"); + taskConfig.put(BigQuerySinkTaskConfig.GCS_BQ_TASK_CONFIG, "true"); } taskConfigs.add(taskConfig); } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTask.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTask.java index bf4b43d6d..452024165 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTask.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTask.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.InsertAllRequest.RowToInsert; @@ -32,7 +33,6 @@ import com.wepay.kafka.connect.bigquery.convert.KafkaDataBuilder; import com.wepay.kafka.connect.bigquery.convert.RecordConverter; import com.wepay.kafka.connect.bigquery.convert.SchemaConverter; -import com.wepay.kafka.connect.bigquery.exception.SinkConfigConnectException; import com.wepay.kafka.connect.bigquery.utils.FieldNameSanitizer; import com.wepay.kafka.connect.bigquery.utils.PartitionedTableId; import com.wepay.kafka.connect.bigquery.utils.TopicToTableResolver; @@ -47,7 +47,6 @@ import com.wepay.kafka.connect.bigquery.write.row.SimpleBigQueryWriter; import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.config.ConfigException; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.sink.SinkRecord; @@ -170,7 +169,7 @@ private RowToInsert getRecordRow(SinkRecord record) { if (kafkaDataFieldName.isPresent()) { convertedRecord.put(kafkaDataFieldName.get(), KafkaDataBuilder.buildKafkaDataRecord(record)); } - if (config.getBoolean(config.SANITIZE_FIELD_NAME_CONFIG)) { + if (config.getBoolean(BigQuerySinkConfig.SANITIZE_FIELD_NAME_CONFIG)) { convertedRecord = FieldNameSanitizer.replaceInvalidKeys(convertedRecord); } return RowToInsert.of(getRowId(record), convertedRecord); @@ -185,7 +184,10 @@ private String getRowId(SinkRecord record) { @Override public void put(Collection records) { - logger.info("Putting {} records in the sink.", records.size()); + // Periodically poll for errors here instead of doing a stop-the-world check in flush() + executor.maybeThrowEncounteredError(); + + logger.debug("Putting {} records in the sink.", records.size()); // create tableWriters Map tableWriterBuilders = new HashMap<>(); @@ -201,17 +203,17 @@ public void put(Collection records) { if (!tableWriterBuilders.containsKey(table)) { TableWriterBuilder tableWriterBuilder; - if (config.getList(config.ENABLE_BATCH_CONFIG).contains(record.topic())) { + if (config.getList(BigQuerySinkConfig.ENABLE_BATCH_CONFIG).contains(record.topic())) { String topic = record.topic(); String gcsBlobName = topic + "_" + uuid + "_" + Instant.now().toEpochMilli(); - String gcsFolderName = config.getString(config.GCS_FOLDER_NAME_CONFIG); + String gcsFolderName = config.getString(BigQuerySinkConfig.GCS_FOLDER_NAME_CONFIG); if (gcsFolderName != null && !"".equals(gcsFolderName)) { gcsBlobName = gcsFolderName + "/" + gcsBlobName; } tableWriterBuilder = new GCSBatchTableWriter.Builder( gcsToBQWriter, table.getBaseTableId(), - config.getString(config.GCS_BUCKET_NAME_CONFIG), + config.getString(BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG), gcsBlobName, topic, recordConverter); @@ -251,10 +253,9 @@ private BigQuery getBigQuery() { if (testBigQuery != null) { return testBigQuery; } - String projectName = config.getString(config.PROJECT_CONFIG); - String keyFile = config.getKeyFile(); - String keySource = config.getString(config.KEY_SOURCE_CONFIG); - return new BigQueryHelper().setKeySource(keySource).connect(projectName, keyFile); + return new GcpClientBuilder.BigQueryBuilder() + .withConfig(config) + .build(); } private SchemaManager getSchemaManager(BigQuery bigQuery) { @@ -267,16 +268,16 @@ private SchemaManager getSchemaManager(BigQuery bigQuery) { Optional kafkaKeyFieldName = config.getKafkaKeyFieldName(); Optional kafkaDataFieldName = config.getKafkaDataFieldName(); Optional timestampPartitionFieldName = config.getTimestampPartitionFieldName(); - Optional> clusteringFieldName = config.getClusteringPartitionFieldName(); + Optional> clusteringFieldName = config.getClusteringPartitionFieldNames(); return new SchemaManager(schemaRetriever, schemaConverter, bigQuery, kafkaKeyFieldName, kafkaDataFieldName, timestampPartitionFieldName, clusteringFieldName); } private BigQueryWriter getBigQueryWriter() { - boolean autoUpdateSchemas = config.getBoolean(config.SCHEMA_UPDATE_CONFIG); - boolean autoCreateTables = config.getBoolean(config.TABLE_CREATE_CONFIG); - int retry = config.getInt(config.BIGQUERY_RETRY_CONFIG); - long retryWait = config.getLong(config.BIGQUERY_RETRY_WAIT_CONFIG); + boolean autoUpdateSchemas = config.getBoolean(BigQuerySinkConfig.SCHEMA_UPDATE_CONFIG); + boolean autoCreateTables = config.getBoolean(BigQuerySinkConfig.TABLE_CREATE_CONFIG); + int retry = config.getInt(BigQuerySinkConfig.BIGQUERY_RETRY_CONFIG); + long retryWait = config.getLong(BigQuerySinkConfig.BIGQUERY_RETRY_WAIT_CONFIG); BigQuery bigQuery = getBigQuery(); if (autoUpdateSchemas || autoCreateTables) { return new AdaptiveBigQueryWriter(bigQuery, @@ -294,18 +295,16 @@ private Storage getGcs() { if (testGcs != null) { return testGcs; } - String projectName = config.getString(config.PROJECT_CONFIG); - String key = config.getKeyFile(); - String keySource = config.getString(config.KEY_SOURCE_CONFIG); - return new GCSBuilder(projectName).setKey(key).setKeySource(keySource).build(); - + return new GcpClientBuilder.GcsBuilder() + .withConfig(config) + .build(); } private GCSToBQWriter getGcsWriter() { BigQuery bigQuery = getBigQuery(); - int retry = config.getInt(config.BIGQUERY_RETRY_CONFIG); - long retryWait = config.getLong(config.BIGQUERY_RETRY_WAIT_CONFIG); - boolean autoCreateTables = config.getBoolean(config.TABLE_CREATE_CONFIG); + int retry = config.getInt(BigQuerySinkConfig.BIGQUERY_RETRY_CONFIG); + long retryWait = config.getLong(BigQuerySinkConfig.BIGQUERY_RETRY_WAIT_CONFIG); + boolean autoCreateTables = config.getBoolean(BigQuerySinkConfig.TABLE_CREATE_CONFIG); // schemaManager shall only be needed for creating table hence do not fetch instance if not // needed. SchemaManager schemaManager = autoCreateTables ? getSchemaManager(bigQuery) : null; @@ -320,16 +319,7 @@ private GCSToBQWriter getGcsWriter() { @Override public void start(Map properties) { logger.trace("task.start()"); - final boolean hasGCSBQTask = - properties.remove(BigQuerySinkConnector.GCS_BQ_TASK_CONFIG_KEY) != null; - try { - config = new BigQuerySinkTaskConfig(properties); - } catch (ConfigException err) { - throw new SinkConfigConnectException( - "Couldn't start BigQuerySinkTask due to configuration error", - err - ); - } + config = new BigQuerySinkTaskConfig(properties); bigQueryWriter = getBigQueryWriter(); gcsToBQWriter = getGcsWriter(); @@ -338,10 +328,10 @@ public void start(Map properties) { executor = new KCBQThreadPoolExecutor(config, new LinkedBlockingQueue<>()); topicPartitionManager = new TopicPartitionManager(); useMessageTimeDatePartitioning = - config.getBoolean(config.BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG); + config.getBoolean(BigQuerySinkConfig.BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG); usePartitionDecorator = - config.getBoolean(config.BIGQUERY_PARTITION_DECORATOR_CONFIG); - if (hasGCSBQTask) { + config.getBoolean(BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG); + if (config.getBoolean(BigQuerySinkTaskConfig.GCS_BQ_TASK_CONFIG)) { startGCSToBQLoadTask(); } } @@ -349,7 +339,7 @@ public void start(Map properties) { private void startGCSToBQLoadTask() { logger.info("Attempting to start GCS Load Executor."); gcsLoadExecutor = Executors.newScheduledThreadPool(1); - String bucketName = config.getString(config.GCS_BUCKET_NAME_CONFIG); + String bucketName = config.getString(BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG); Storage gcs = getGcs(); // get the bucket, or create it if it does not exist. Bucket bucket = gcs.get(bucketName); diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GCSBuilder.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GCSBuilder.java deleted file mode 100644 index e2dd3ed37..000000000 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GCSBuilder.java +++ /dev/null @@ -1,107 +0,0 @@ -package com.wepay.kafka.connect.bigquery; - -/* - * Copyright 2016 WePay, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import com.google.auth.oauth2.GoogleCredentials; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageOptions; - -import com.wepay.kafka.connect.bigquery.exception.GCSConnectException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.ByteArrayInputStream; -import java.nio.charset.StandardCharsets; - -/** - * Convenience class for creating a {@link com.google.cloud.storage.Storage} instance - */ -public class GCSBuilder { - private static final Logger logger = LoggerFactory.getLogger(GCSBuilder.class); - - private final String projectName; - private String key; - private String keySource; - - public GCSBuilder(String projectName) { - this.projectName = projectName; - this.key = null; - } - - public GCSBuilder setKeySource(String keySourceType) { - this.keySource = keySourceType; - return this; - } - - public GCSBuilder setKey(String keyFile) { - this.key = keyFile; - return this; - } - public Storage build() { - return connect(projectName, key); - } - - /** - * Returns a default {@link Storage} instance for the specified project with credentials provided - * in the specified file. - * - * @param projectName The name of the GCS project to work with - * @param key The name of a file containing a JSON key that can be used to provide - * credentials to GCS, or null if no authentication should be performed. - * @return The resulting Storage object. - */ - private Storage connect(String projectName, String key) { - if (key == null) { - return connect(projectName); - } - try { - InputStream credentialsStream; - if (keySource != null && keySource.equals("JSON")) { - credentialsStream = new ByteArrayInputStream(key.getBytes(StandardCharsets.UTF_8)); - } else { - credentialsStream = new FileInputStream(key); - } - return StorageOptions.newBuilder() - .setProjectId(projectName) - .setCredentials(GoogleCredentials.fromStream(credentialsStream)) - .build() - .getService(); - } catch (IOException err) { - throw new GCSConnectException("Failed to access json key file", err); - } - } - - /** - * Returns a default {@link Storage} instance for the specified project with no authentication - * credentials. - * - * @param projectName The name of the GCS project to work with - * @return The resulting Storage object. - */ - private Storage connect(String projectName) { - logger.debug("Attempting to access BigQuery without authentication"); - return StorageOptions.newBuilder() - .setProjectId(projectName) - .build() - .getService(); - } -} - diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GCSToBQLoadRunnable.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GCSToBQLoadRunnable.java index 348fccb29..aa42c8e6c 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GCSToBQLoadRunnable.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GCSToBQLoadRunnable.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery; + import com.google.api.gax.paging.Page; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryException; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GcpClientBuilder.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GcpClientBuilder.java new file mode 100644 index 000000000..5c79fec87 --- /dev/null +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/GcpClientBuilder.java @@ -0,0 +1,145 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery; + +import com.google.auth.oauth2.GoogleCredentials; +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; +import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; +import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Objects; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.PROJECT_CONFIG; + +public abstract class GcpClientBuilder { + + public enum KeySource { + FILE, JSON + } + + private static final Logger logger = LoggerFactory.getLogger(GcpClientBuilder.class); + + private String project = null; + private KeySource keySource = null; + private String key = null; + + public GcpClientBuilder withConfig(BigQuerySinkConfig config) { + return withProject(config.getString(PROJECT_CONFIG)) + .withKeySource(config.getKeySource()) + .withKey(config.getKey()); + } + + public GcpClientBuilder withProject(String project) { + Objects.requireNonNull(project, "Project cannot be null"); + this.project = project; + return this; + } + + public GcpClientBuilder withKeySource(KeySource keySource) { + Objects.requireNonNull(keySource, "Key cannot be null"); + this.keySource = keySource; + return this; + } + + public GcpClientBuilder withKey(String key) { + this.key = key; + return this; + } + + public Client build() { + return doBuild(project, credentials()); + } + + private GoogleCredentials credentials() { + if (key == null) { + return null; + } + + Objects.requireNonNull(keySource, "Key source must be defined to build a GCP client"); + Objects.requireNonNull(project, "Project must be defined to build a GCP client"); + + InputStream credentialsStream; + switch (keySource) { + case JSON: + credentialsStream = new ByteArrayInputStream(key.getBytes(StandardCharsets.UTF_8)); + break; + case FILE: + try { + logger.debug("Attempting to open file {} for service account json key", key); + credentialsStream = new FileInputStream(key); + } catch (IOException e) { + throw new BigQueryConnectException("Failed to access JSON key file", e); + } + break; + default: + throw new IllegalArgumentException("Unexpected value for KeySource enum: " + keySource); + } + + try { + return GoogleCredentials.fromStream(credentialsStream); + } catch (IOException e) { + throw new BigQueryConnectException("Failed to create credentials from input stream", e); + } + } + + protected abstract Client doBuild(String project, GoogleCredentials credentials); + + public static class BigQueryBuilder extends GcpClientBuilder { + @Override + protected BigQuery doBuild(String project, GoogleCredentials credentials) { + BigQueryOptions.Builder builder = BigQueryOptions.newBuilder() + .setProjectId(project); + + if (credentials != null) { + builder.setCredentials(credentials); + } else { + logger.debug("Attempting to access BigQuery without authentication"); + } + + return builder.build().getService(); + } + } + + public static class GcsBuilder extends GcpClientBuilder { + @Override + protected Storage doBuild(String project, GoogleCredentials credentials) { + StorageOptions.Builder builder = StorageOptions.newBuilder() + .setProjectId(project); + + if (credentials != null) { + builder.setCredentials(credentials); + } else { + logger.debug("Attempting to access GCS without authentication"); + } + + return builder.build().getService(); + } + } +} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/SchemaManager.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/SchemaManager.java index 8d94ab61c..21d5a1ec9 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/SchemaManager.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/SchemaManager.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfig.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfig.java index d4362c381..6f2d05446 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfig.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfig.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.config; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,11 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.config; import com.google.cloud.bigquery.Schema; +import com.wepay.kafka.connect.bigquery.GcpClientBuilder; import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; import com.wepay.kafka.connect.bigquery.convert.BigQueryRecordConverter; @@ -28,13 +30,14 @@ import com.wepay.kafka.connect.bigquery.convert.SchemaConverter; import org.apache.kafka.common.config.AbstractConfig; +import org.apache.kafka.common.config.Config; +import org.apache.kafka.common.config.ConfigValue; import org.apache.kafka.common.config.types.Password; import org.apache.kafka.common.config.ConfigDef; import org.apache.kafka.common.config.ConfigException; +import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.sink.SinkConnector; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; @@ -45,18 +48,17 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Base class for connector and task configs; contains properties shared between the two of them. */ public class BigQuerySinkConfig extends AbstractConfig { - private static final ConfigDef config; - private static final Validator validator = new Validator(); - private static final Logger logger = LoggerFactory.getLogger(BigQuerySinkConfig.class); - // Values taken from https://github.com/apache/kafka/blob/1.1.1/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java#L33 public static final String TOPICS_CONFIG = SinkConnector.TOPICS_CONFIG; private static final ConfigDef.Type TOPICS_TYPE = ConfigDef.Type.LIST; @@ -118,6 +120,7 @@ public class BigQuerySinkConfig extends AbstractConfig { private static final ConfigDef.Importance TOPICS_TO_TABLES_IMPORTANCE = ConfigDef.Importance.MEDIUM; public static final Object TOPICS_TO_TABLES_DEFAULT = null; + private static final ConfigDef.Validator TOPICS_TO_TABLES_VALIDATOR = new MapValidator(); private static final String TOPICS_TO_TABLES_DOC = "A list of mappings from topic regexes to table names. Note the regex must include " + "capture groups that are referenced in the format string using placeholders (i.e. $1) " @@ -132,7 +135,7 @@ public class BigQuerySinkConfig extends AbstractConfig { public static final String DATASETS_CONFIG = "datasets"; private static final ConfigDef.Type DATASETS_TYPE = ConfigDef.Type.LIST; private static final Object DATASETS_DEFAULT = ConfigDef.NO_DEFAULT_VALUE; - private static final ConfigDef.Validator DATASETS_VALIDATOR = validator; + private static final ConfigDef.Validator DATASETS_VALIDATOR = new MapValidator(); private static final ConfigDef.Importance DATASETS_IMPORTANCE = ConfigDef.Importance.HIGH; private static final String DATASETS_DOC = "Names for the datasets kafka topics will write to " @@ -155,9 +158,13 @@ public class BigQuerySinkConfig extends AbstractConfig { public static final String KEY_SOURCE_CONFIG = "keySource"; private static final ConfigDef.Type KEY_SOURCE_TYPE = ConfigDef.Type.STRING; - public static final String KEY_SOURCE_DEFAULT = "FILE"; - private static final ConfigDef.Validator KEY_SOURCE_VALIDATOR = - ConfigDef.ValidString.in("FILE", "JSON"); + public static final String KEY_SOURCE_DEFAULT = GcpClientBuilder.KeySource.FILE.name(); + private static final ConfigDef.Validator KEY_SOURCE_VALIDATOR = ConfigDef.ValidString.in( + Stream.of(GcpClientBuilder.KeySource.values()) + .map(GcpClientBuilder.KeySource::name) + .collect(Collectors.toList()) + .toArray(new String[0]) + ); private static final ConfigDef.Importance KEY_SOURCE_IMPORTANCE = ConfigDef.Importance.MEDIUM; private static final String KEY_SOURCE_DOC = "Determines whether the keyfile config is the path to the credentials json, or the json itself"; @@ -187,6 +194,7 @@ public class BigQuerySinkConfig extends AbstractConfig { public static final String KAFKA_KEY_FIELD_NAME_CONFIG = "kafkaKeyFieldName"; private static final ConfigDef.Type KAFKA_KEY_FIELD_NAME_TYPE = ConfigDef.Type.STRING; public static final String KAFKA_KEY_FIELD_NAME_DEFAULT = null; + private static final ConfigDef.Validator KAFKA_KEY_FIELD_NAME_VALIDATOR = new ConfigDef.NonEmptyString(); private static final ConfigDef.Importance KAFKA_KEY_FIELD_NAME_IMPORTANCE = ConfigDef.Importance.LOW; private static final String KAFKA_KEY_FIELD_NAME_DOC = "The name of the field of Kafka key. " + "Default to be null, which means Kafka Key Field will not be included."; @@ -194,6 +202,7 @@ public class BigQuerySinkConfig extends AbstractConfig { public static final String KAFKA_DATA_FIELD_NAME_CONFIG = "kafkaDataFieldName"; private static final ConfigDef.Type KAFKA_DATA_FIELD_NAME_TYPE = ConfigDef.Type.STRING; public static final String KAFKA_DATA_FIELD_NAME_DEFAULT = null; + private static final ConfigDef.Validator KAFKA_DATA_FIELD_NAME_VALIDATOR = new ConfigDef.NonEmptyString(); private static final ConfigDef.Importance KAFKA_DATA_FIELD_NAME_IMPORTANCE = ConfigDef.Importance.LOW; private static final String KAFKA_DATA_FIELD_NAME_DOC = "The name of the field of Kafka Data. " + "Default to be null, which means Kafka Data Field will not be included. "; @@ -233,8 +242,123 @@ public class BigQuerySinkConfig extends AbstractConfig { private static final String TABLE_CREATE_DOC = "Automatically create BigQuery tables if they don't already exist"; - static { - config = new ConfigDef() + public static final String SCHEMA_UPDATE_CONFIG = "autoUpdateSchemas"; + private static final ConfigDef.Type SCHEMA_UPDATE_TYPE = ConfigDef.Type.BOOLEAN; + public static final Boolean SCHEMA_UPDATE_DEFAULT = false; + private static final ConfigDef.Importance SCHEMA_UPDATE_IMPORTANCE = ConfigDef.Importance.HIGH; + private static final String SCHEMA_UPDATE_DOC = + "Whether or not to automatically update BigQuery schemas"; + + public static final String THREAD_POOL_SIZE_CONFIG = "threadPoolSize"; + private static final ConfigDef.Type THREAD_POOL_SIZE_TYPE = ConfigDef.Type.INT; + public static final Integer THREAD_POOL_SIZE_DEFAULT = 10; + private static final ConfigDef.Validator THREAD_POOL_SIZE_VALIDATOR = ConfigDef.Range.atLeast(1); + private static final ConfigDef.Importance THREAD_POOL_SIZE_IMPORTANCE = + ConfigDef.Importance.MEDIUM; + private static final String THREAD_POOL_SIZE_DOC = + "The size of the BigQuery write thread pool. This establishes the maximum number of " + + "concurrent writes to BigQuery."; + + public static final String QUEUE_SIZE_CONFIG = "queueSize"; + private static final ConfigDef.Type QUEUE_SIZE_TYPE = ConfigDef.Type.LONG; + // should this even have a default? + public static final Long QUEUE_SIZE_DEFAULT = -1L; + private static final ConfigDef.Validator QUEUE_SIZE_VALIDATOR = ConfigDef.Range.atLeast(-1); + private static final ConfigDef.Importance QUEUE_SIZE_IMPORTANCE = ConfigDef.Importance.HIGH; + private static final String QUEUE_SIZE_DOC = + "The maximum size (or -1 for no maximum size) of the worker queue for bigQuery write " + + "requests before all topics are paused. This is a soft limit; the size of the queue can " + + "go over this before topics are paused. All topics will be resumed once a flush is " + + "requested or the size of the queue drops under half of the maximum size."; + + public static final String BIGQUERY_RETRY_CONFIG = "bigQueryRetry"; + private static final ConfigDef.Type BIGQUERY_RETRY_TYPE = ConfigDef.Type.INT; + public static final Integer BIGQUERY_RETRY_DEFAULT = 0; + private static final ConfigDef.Validator BIGQUERY_RETRY_VALIDATOR = ConfigDef.Range.atLeast(0); + private static final ConfigDef.Importance BIGQUERY_RETRY_IMPORTANCE = + ConfigDef.Importance.MEDIUM; + private static final String BIGQUERY_RETRY_DOC = + "The number of retry attempts that will be made per BigQuery request that fails with a " + + "backend error or a quota exceeded error"; + + public static final String BIGQUERY_RETRY_WAIT_CONFIG = "bigQueryRetryWait"; + private static final ConfigDef.Type BIGQUERY_RETRY_WAIT_CONFIG_TYPE = ConfigDef.Type.LONG; + public static final Long BIGQUERY_RETRY_WAIT_DEFAULT = 1000L; + private static final ConfigDef.Validator BIGQUERY_RETRY_WAIT_VALIDATOR = + ConfigDef.Range.atLeast(0); + private static final ConfigDef.Importance BIGQUERY_RETRY_WAIT_IMPORTANCE = + ConfigDef.Importance.MEDIUM; + private static final String BIGQUERY_RETRY_WAIT_DOC = + "The minimum amount of time, in milliseconds, to wait between BigQuery backend or quota " + + "exceeded error retry attempts."; + + public static final String BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG = + "bigQueryMessageTimePartitioning"; + private static final ConfigDef.Type BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG_TYPE = + ConfigDef.Type.BOOLEAN; + public static final Boolean BIGQUERY_MESSAGE_TIME_PARTITIONING_DEFAULT = false; + private static final ConfigDef.Importance BIGQUERY_MESSAGE_TIME_PARTITIONING_IMPORTANCE = + ConfigDef.Importance.HIGH; + private static final String BIGQUERY_MESSAGE_TIME_PARTITIONING_DOC = + "Whether or not to use the message time when inserting records. " + + "Default uses the connector processing time."; + + public static final String BIGQUERY_PARTITION_DECORATOR_CONFIG = + "bigQueryPartitionDecorator"; + private static final ConfigDef.Type BIGQUERY_PARTITION_DECORATOR_CONFIG_TYPE = + ConfigDef.Type.BOOLEAN; + //This has been set to true to preserve the existing behavior. However, we can set it to false if field based partitioning is used in BigQuery + public static final Boolean BIGQUERY_PARTITION_DECORATOR_DEFAULT = true; + private static final ConfigDef.Importance BIGQUERY_PARTITION_DECORATOR_IMPORTANCE = + ConfigDef.Importance.HIGH; + private static final String BIGQUERY_PARTITION_DECORATOR_DOC = + "Whether or not to append partition decorator to BigQuery table name when inserting records. " + + "Default is true. Setting this to true appends partition decorator to table name (e.g. table$yyyyMMdd depending on the configuration set for bigQueryPartitionDecorator). " + + "Setting this to false bypasses the logic to append the partition decorator and uses raw table name for inserts."; + + public static final String BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG = "timestampPartitionFieldName"; + private static final ConfigDef.Type BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_TYPE = ConfigDef.Type.STRING; + private static final String BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_DEFAULT = null; + private static final ConfigDef.Validator BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_VALIDATOR = new ConfigDef.NonEmptyString(); + private static final ConfigDef.Importance BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_IMPORTANCE = + ConfigDef.Importance.LOW; + private static final String BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_DOC = + "The name of the field in the value that contains the timestamp to partition by in BigQuery" + + " and enable timestamp partitioning for each table. Leave this configuration blank," + + " to enable ingestion time partitioning for each table."; + + public static final String BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG = "clusteringPartitionFieldNames"; + private static final ConfigDef.Type BIGQUERY_CLUSTERING_FIELD_NAMES_TYPE = ConfigDef.Type.LIST; + private static final List BIGQUERY_CLUSTERING_FIELD_NAMES_DEFAULT = null; + private static final ConfigDef.Validator BIGQUERY_CLUSTERING_FIELD_NAMES_VALIDATOR = (name, value) -> { + if (value == null) { + return; + } + + @SuppressWarnings("unchecked") + List parsedValue = (List) value; + if (parsedValue.size() > 4) { + throw new ConfigException(name, value, "You may only specify up to four clustering field names."); + } + }; + private static final ConfigDef.Importance BIGQUERY_CLUSTERING_FIELD_NAMES_IMPORTANCE = + ConfigDef.Importance.LOW; + private static final String BIGQUERY_CLUSTERING_FIELD_NAMES_DOC = + "List of fields on which data should be clustered by in BigQuery, separated by commas"; + + public static final String CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_CONFIG = "convertDebeziumTimestampToInteger"; + private static final ConfigDef.Type CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_TYPE = ConfigDef.Type.BOOLEAN; + private static final Boolean CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_DEFAULT = false; + private static final ConfigDef.Importance CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_IMPORTANCE = + ConfigDef.Importance.MEDIUM; + + /** + * Return the ConfigDef object used to define this config's fields. + * + * @return The ConfigDef object used to define this config's fields. + */ + public static ConfigDef getConfig() { + return new ConfigDef() .define( TOPICS_CONFIG, TOPICS_TYPE, @@ -283,6 +407,7 @@ public class BigQuerySinkConfig extends AbstractConfig { TOPICS_TO_TABLES_CONFIG, TOPICS_TO_TABLES_TYPE, TOPICS_TO_TABLES_DEFAULT, + TOPICS_TO_TABLES_VALIDATOR, TOPICS_TO_TABLES_IMPORTANCE, TOPICS_TO_TABLES_DOC ).define( @@ -332,12 +457,14 @@ public class BigQuerySinkConfig extends AbstractConfig { KAFKA_KEY_FIELD_NAME_CONFIG, KAFKA_KEY_FIELD_NAME_TYPE, KAFKA_KEY_FIELD_NAME_DEFAULT, + KAFKA_KEY_FIELD_NAME_VALIDATOR, KAFKA_KEY_FIELD_NAME_IMPORTANCE, KAFKA_KEY_FIELD_NAME_DOC ).define( KAFKA_DATA_FIELD_NAME_CONFIG, KAFKA_DATA_FIELD_NAME_TYPE, KAFKA_DATA_FIELD_NAME_DEFAULT, + KAFKA_DATA_FIELD_NAME_VALIDATOR, KAFKA_DATA_FIELD_NAME_IMPORTANCE, KAFKA_DATA_FIELD_NAME_DOC ).define( @@ -365,125 +492,200 @@ public class BigQuerySinkConfig extends AbstractConfig { TABLE_CREATE_DEFAULT, TABLE_CREATE_IMPORTANCE, TABLE_CREATE_DOC + ).define( + SCHEMA_UPDATE_CONFIG, + SCHEMA_UPDATE_TYPE, + SCHEMA_UPDATE_DEFAULT, + SCHEMA_UPDATE_IMPORTANCE, + SCHEMA_UPDATE_DOC + ).define( + THREAD_POOL_SIZE_CONFIG, + THREAD_POOL_SIZE_TYPE, + THREAD_POOL_SIZE_DEFAULT, + THREAD_POOL_SIZE_VALIDATOR, + THREAD_POOL_SIZE_IMPORTANCE, + THREAD_POOL_SIZE_DOC + ).define( + QUEUE_SIZE_CONFIG, + QUEUE_SIZE_TYPE, + QUEUE_SIZE_DEFAULT, + QUEUE_SIZE_VALIDATOR, + QUEUE_SIZE_IMPORTANCE, + QUEUE_SIZE_DOC + ).define( + BIGQUERY_RETRY_CONFIG, + BIGQUERY_RETRY_TYPE, + BIGQUERY_RETRY_DEFAULT, + BIGQUERY_RETRY_VALIDATOR, + BIGQUERY_RETRY_IMPORTANCE, + BIGQUERY_RETRY_DOC + ).define( + BIGQUERY_RETRY_WAIT_CONFIG, + BIGQUERY_RETRY_WAIT_CONFIG_TYPE, + BIGQUERY_RETRY_WAIT_DEFAULT, + BIGQUERY_RETRY_WAIT_VALIDATOR, + BIGQUERY_RETRY_WAIT_IMPORTANCE, + BIGQUERY_RETRY_WAIT_DOC + ).define( + BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG, + BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG_TYPE, + BIGQUERY_MESSAGE_TIME_PARTITIONING_DEFAULT, + BIGQUERY_MESSAGE_TIME_PARTITIONING_IMPORTANCE, + BIGQUERY_MESSAGE_TIME_PARTITIONING_DOC + ).define( + BIGQUERY_PARTITION_DECORATOR_CONFIG, + BIGQUERY_PARTITION_DECORATOR_CONFIG_TYPE, + BIGQUERY_PARTITION_DECORATOR_DEFAULT, + BIGQUERY_PARTITION_DECORATOR_IMPORTANCE, + BIGQUERY_PARTITION_DECORATOR_DOC + ).define( + BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG, + BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_TYPE, + BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_DEFAULT, + BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_VALIDATOR, + BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_IMPORTANCE, + BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_DOC + ).define( + BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG, + BIGQUERY_CLUSTERING_FIELD_NAMES_TYPE, + BIGQUERY_CLUSTERING_FIELD_NAMES_DEFAULT, + BIGQUERY_CLUSTERING_FIELD_NAMES_VALIDATOR, + BIGQUERY_CLUSTERING_FIELD_NAMES_IMPORTANCE, + BIGQUERY_CLUSTERING_FIELD_NAMES_DOC + ).defineInternal( + CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_CONFIG, + CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_TYPE, + CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_DEFAULT, + CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_IMPORTANCE ); } - /** - * Throw an exception if the passed-in properties do not constitute a valid sink. - * @param props sink configuration properties - */ - public static void validate(Map props) { - final boolean hasTopicsConfig = hasTopicsConfig(props); - final boolean hasTopicsRegexConfig = hasTopicsRegexConfig(props); - - if (hasTopicsConfig && hasTopicsRegexConfig) { - throw new ConfigException(TOPICS_CONFIG + " and " + TOPICS_REGEX_CONFIG + - " are mutually exclusive options, but both are set."); - } - if (!hasTopicsConfig && !hasTopicsRegexConfig) { - throw new ConfigException("Must configure one of " + - TOPICS_CONFIG + " or " + TOPICS_REGEX_CONFIG); - } - } + private static final List> MULTI_PROPERTY_VALIDATIONS = new ArrayList<>(); - public static boolean hasTopicsConfig(Map props) { - String topicsStr = props.get(TOPICS_CONFIG); - return topicsStr != null && !topicsStr.trim().isEmpty(); - } + static { + // Note that order matters here: validations are performed in the order they're added to this list, and if a + // property or any of the properties that it depends on has an error, validation for it gets skipped. + // This comes in handy for things like checking for the existence of tables, which requires valid BigQuery + // credentials. We validate those credentials before checking for tables so that we can safely assume while + // checking for those tables that the credentials are already valid. + MULTI_PROPERTY_VALIDATIONS.add(new CredentialsValidator.BigQueryCredentialsValidator()); + MULTI_PROPERTY_VALIDATIONS.add(new CredentialsValidator.GcsCredentialsValidator()); + MULTI_PROPERTY_VALIDATIONS.add(new TableExistenceValidator()); + MULTI_PROPERTY_VALIDATIONS.add(new SchemaRetrieverValidator.TableCreationValidator()); + MULTI_PROPERTY_VALIDATIONS.add(new SchemaRetrieverValidator.SchemaUpdateValidator()); + MULTI_PROPERTY_VALIDATIONS.add(new GcsBucketValidator()); + MULTI_PROPERTY_VALIDATIONS.add(new PartitioningModeValidator()); + } - public static boolean hasTopicsRegexConfig(Map props) { - String topicsRegexStr = props.get(TOPICS_REGEX_CONFIG); - return topicsRegexStr != null && !topicsRegexStr.trim().isEmpty(); + /** + * Used in conjunction with {@link com.wepay.kafka.connect.bigquery.BigQuerySinkConnector#validate(Map)} to perform + * preflight configuration checks. Simple validations that only require a single property value at a time (such as + * ensuring that boolean properties only contain true/false values, or that values for required properties are + * provided) are handled automatically by the {@link #getConfig() ConfigDef} for this class and optionally-defined + * custom {@link ConfigDef.Validator validators}. Other, more sophisticated validations that require multiple + * property values at a time (such as checking if all of the tables the connector will write to already exist if + * automatic table creation is disabled) are performed manually in a subsequent step. + * + * @return a {@link Config} object containing all errors that the connector was able to detect during preflight + * validation of this configuration; never null + */ + public Config validate() { + List initialValidation = getConfig().validate(originalsStrings()); + Map valuesByName = initialValidation + .stream() + .collect(Collectors.toMap(ConfigValue::name, Function.identity())); + MULTI_PROPERTY_VALIDATIONS.forEach(validator -> { + ConfigValue value = valuesByName.get(validator.propertyName()); + validator.validate(value, this, valuesByName).ifPresent(value::addErrorMessage); + }); + return new Config(initialValidation); + } + + /** + * Ensure that this config is valid (including multi-property validations performed in {@link #validate()}, and if any errors + * are detected, throw an exception. + * @throws ConnectException if this config is invalid + */ + public void ensureValid() { + Config config = validate(); + List errors = config.configValues().stream() + .filter(v -> !v.errorMessages().isEmpty()) + .map(v -> "For property '" + v.name() + "': " + String.join(",", v.errorMessages())) + .collect(Collectors.toList()); + if (!errors.isEmpty()) { + throw new ConnectException( + "The connector config is invalid and contains the following errors:\n" + + String.join("\n", errors) + ); } + } - @SuppressWarnings("unchecked") - public static class Validator implements ConfigDef.Validator { + public static class MapValidator implements ConfigDef.Validator { @Override public void ensureValid(String name, Object value) { - switch (name) { - case DATASETS_CONFIG: - ensureValidMap(name, (List) value); - break; - case TOPICS_TO_TABLES_CONFIG: - ensureValidMap(name, (List) value); - break; - default: - break; - } - } - - protected static void ensureValidMap(String name, List values) { - if (values == null) { + if (value == null) { return; } - values.forEach((entry) -> parseMapping(entry, name)); + + @SuppressWarnings("unchecked") + List parsedValue = (List) value; + + parsedValue.forEach(BigQuerySinkConfig::parseMapping); } - /** - * Ensures the mapping given is valid, then returns an entry containing its key and value. - * Checks to make sure that the given String adheres to the specified format, and throws - * an exception if it does not. Trims leading and trailing whitespace, and then checks to make - * sure that both Strings are still non-empty. - * - * @param mapping The mapping to parse (should be of the form <key>=<value>) - * @param name The name of the field. Used in error messages. - * @return A Map.Entry containing the parsed key/value pair. - */ - protected static Map.Entry parseMapping(String mapping, String name) { - String[] keyValue = mapping.split("="); - if (keyValue.length != 2) { - throw new ConfigException( - "Invalid mapping for " + name - + " property: '" + mapping - + "' (must follow format '=')" - ); - } + @Override + public String toString() { + return "A list of key-value pairs in the format =, =, ..."; + } + } - String key = keyValue[0].trim(); - if (key.isEmpty()) { - throw new ConfigException( - "Empty key found in mapping '" + mapping - + "' for " + name + " property" - ); - } + /** + * Ensures the mapping given is valid, then returns an entry containing its key and value. + * Checks to make sure that the given String adheres to the specified format, and throws + * an exception if it does not. Trims leading and trailing whitespace, and then checks to make + * sure that both Strings are still non-empty. + * + * @param mapping The mapping to parse (should be of the form <key>=<value>) + * @return A Map.Entry containing the parsed key/value pair. + */ + static Map.Entry parseMapping(String mapping) { + String[] keyValue = mapping.split("="); + if (keyValue.length != 2) { + throw new ConfigException("Invalid mapping '" + mapping + "' (must follow format '=')"); + } - String value = keyValue[1].trim(); - if (value.isEmpty()) { - throw new ConfigException( - "Empty value found in mapping '" + mapping - + "' for " + name + " property" - ); - } + String key = keyValue[0].trim(); + if (key.isEmpty()) { + throw new ConfigException("Invalid mapping '" + mapping + "' (key cannot be empty)"); + } - return new AbstractMap.SimpleEntry<>(key, value); + String value = keyValue[1].trim(); + if (value.isEmpty()) { + throw new ConfigException("Invalid mapping '" + mapping + "' (value cannot be empty)"); } + + return new AbstractMap.SimpleEntry<>(key, value); } /** - * Returns the keyfile + * @return the key, which is (depending on the key source property) either a path to a file or a raw JSON string */ - public String getKeyFile() { + public String getKey() { return Optional.ofNullable(getPassword(KEYFILE_CONFIG)).map(Password::value).orElse(null); } /** - * Parses a config map, which must be provided as a list of Strings of the form - * '<key>=<value>' into a Map. Locates that list, splits its key and value pairs, and - * returns they Map they represent. - * - * @param name The name of the property the mapping is given for. Used in exception messages. - * @return A Map containing the given key and value pairs. + * @return the {@link com.wepay.kafka.connect.bigquery.GcpClientBuilder.KeySource key source type} that dictates how + * the {@link #getKey()} should be be interpreted */ - public Map getMap(String name) { - List assocList = getList(name); - Map configMap = new HashMap<>(); - if (assocList != null) { - for (String mapping : assocList) { - Map.Entry entry = validator.parseMapping(mapping, name); - configMap.put(entry.getKey(), entry.getValue()); - } + public GcpClientBuilder.KeySource getKeySource() { + String rawKeySource = getString(KEY_SOURCE_CONFIG); + try { + return GcpClientBuilder.KeySource.valueOf(rawKeySource); + } catch (IllegalArgumentException e) { + // Should never happen with preflight validation of the key source property + throw new ConnectException("Invalid key source type: " + rawKeySource); } - return configMap; } /** @@ -498,7 +700,7 @@ public List> getSinglePatterns(String property) { List> patternList = new ArrayList<>(); if (propList != null) { for (String propValue : propList) { - Map.Entry mapping = validator.parseMapping(propValue, property); + Map.Entry mapping = parseMapping(propValue); Pattern propPattern = Pattern.compile(mapping.getKey()); Map.Entry patternEntry = new AbstractMap.SimpleEntry<>(propPattern, mapping.getValue()); @@ -589,7 +791,7 @@ public SchemaConverter getSchemaConverter() { * @return a {@link RecordConverter} for BigQuery. */ public RecordConverter> getRecordConverter() { - return new BigQueryRecordConverter(getBoolean(CONVERT_DOUBLE_SPECIAL_VALUES_CONFIG)); + return new BigQueryRecordConverter(getBoolean(CONVERT_DOUBLE_SPECIAL_VALUES_CONFIG), getBoolean(CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER_CONFIG)); } /** @@ -620,7 +822,7 @@ public SchemaRetriever getSchemaRetriever() { Class schemaRetrieverClass = userSpecifiedClass.asSubclass(SchemaRetriever.class); - Constructor schemaRetrieverConstructor = null; + Constructor schemaRetrieverConstructor; try { schemaRetrieverConstructor = schemaRetrieverClass.getConstructor(); } catch (NoSuchMethodException nsme) { @@ -630,7 +832,7 @@ public SchemaRetriever getSchemaRetriever() { ); } - SchemaRetriever schemaRetriever = null; + SchemaRetriever schemaRetriever; try { schemaRetriever = schemaRetrieverConstructor.newInstance(); } catch (InstantiationException @@ -649,7 +851,6 @@ public SchemaRetriever getSchemaRetriever() { } /** - * * If the connector is configured to load Kafka data into BigQuery, this config defines * the name of the kafka data field. A structure is created under the field name to contain * kafka data schema including topic, offset, partition and insertTime. @@ -661,7 +862,6 @@ public Optional getKafkaKeyFieldName() { } /** - * * If the connector is configured to load Kafka keys into BigQuery, this config defines * the name of the kafka key field. A structure is created under the field name to contain * a topic's Kafka key schema. @@ -673,47 +873,29 @@ public Optional getKafkaDataFieldName() { } /** - * Verifies that a bucket is specified if GCS batch loading is enabled. - * @throws ConfigException Exception thrown if no bucket is specified and batch loading is on. + * Returns the field name to use for timestamp partitioning. + * @return String that represents the field name. */ - private void verifyBucketSpecified() throws ConfigException { - // Throw an exception if GCS Batch loading will be used but no bucket is specified - if (getString(GCS_BUCKET_NAME_CONFIG).equals("") - && !getList(ENABLE_BATCH_CONFIG).isEmpty()) { - throw new ConfigException("Batch loading enabled for some topics, but no bucket specified"); - } - } - - private void checkAutoCreateTables() { - - Class schemaRetriever = getClass(BigQuerySinkConfig.SCHEMA_RETRIEVER_CONFIG); - boolean autoCreateTables = getBoolean(TABLE_CREATE_CONFIG); - - if (autoCreateTables && schemaRetriever == null) { - throw new ConfigException( - "Cannot specify automatic table creation without a schema retriever" - ); - } + public Optional getTimestampPartitionFieldName() { + return Optional.ofNullable(getString(BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG)); } /** - * Return the ConfigDef object used to define this config's fields. - * - * @return The ConfigDef object used to define this config's fields. + * Returns the field names to use for clustering. + * @return List of Strings that represent the field names. */ - public static ConfigDef getConfig() { - return config; + public Optional> getClusteringPartitionFieldNames() { + return Optional + .ofNullable(getList(BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG)) + // With Java 11 there's Predicate::not, but for now we have to just manually invert the isEmpty check + .filter(l -> !l.isEmpty()); } protected BigQuerySinkConfig(ConfigDef config, Map properties) { super(config, properties); - verifyBucketSpecified(); } public BigQuerySinkConfig(Map properties) { - super(config, properties); - verifyBucketSpecified(); - checkAutoCreateTables(); + this(getConfig(), properties); } - } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfig.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfig.java index b2e9fc19f..9a72561be 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfig.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfig.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.config; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,14 +17,9 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.config; -import java.util.List; -import java.util.Optional; import org.apache.kafka.common.config.ConfigDef; -import org.apache.kafka.common.config.ConfigException; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.util.Map; @@ -32,238 +27,21 @@ * Class for task-specific configuration properties. */ public class BigQuerySinkTaskConfig extends BigQuerySinkConfig { - private static final ConfigDef config; - private static final Logger logger = LoggerFactory.getLogger(BigQuerySinkTaskConfig.class); - - public static final String SCHEMA_UPDATE_CONFIG = "autoUpdateSchemas"; - private static final ConfigDef.Type SCHEMA_UPDATE_TYPE = ConfigDef.Type.BOOLEAN; - public static final Boolean SCHEMA_UPDATE_DEFAULT = false; - private static final ConfigDef.Importance SCHEMA_UPDATE_IMPORTANCE = ConfigDef.Importance.HIGH; - private static final String SCHEMA_UPDATE_DOC = - "Whether or not to automatically update BigQuery schemas"; - - public static final String THREAD_POOL_SIZE_CONFIG = "threadPoolSize"; - private static final ConfigDef.Type THREAD_POOL_SIZE_TYPE = ConfigDef.Type.INT; - public static final Integer THREAD_POOL_SIZE_DEFAULT = 10; - private static final ConfigDef.Validator THREAD_POOL_SIZE_VALIDATOR = ConfigDef.Range.atLeast(1); - private static final ConfigDef.Importance THREAD_POOL_SIZE_IMPORTANCE = - ConfigDef.Importance.MEDIUM; - private static final String THREAD_POOL_SIZE_DOC = - "The size of the BigQuery write thread pool. This establishes the maximum number of " - + "concurrent writes to BigQuery."; - - public static final String QUEUE_SIZE_CONFIG = "queueSize"; - private static final ConfigDef.Type QUEUE_SIZE_TYPE = ConfigDef.Type.LONG; - // should this even have a default? - public static final Long QUEUE_SIZE_DEFAULT = -1L; - private static final ConfigDef.Validator QUEUE_SIZE_VALIDATOR = ConfigDef.Range.atLeast(-1); - private static final ConfigDef.Importance QUEUE_SIZE_IMPORTANCE = ConfigDef.Importance.HIGH; - private static final String QUEUE_SIZE_DOC = - "The maximum size (or -1 for no maximum size) of the worker queue for bigQuery write " - + "requests before all topics are paused. This is a soft limit; the size of the queue can " - + "go over this before topics are paused. All topics will be resumed once a flush is " - + "requested or the size of the queue drops under half of the maximum size."; - - public static final String BIGQUERY_RETRY_CONFIG = "bigQueryRetry"; - private static final ConfigDef.Type BIGQUERY_RETRY_TYPE = ConfigDef.Type.INT; - public static final Integer BIGQUERY_RETRY_DEFAULT = 0; - private static final ConfigDef.Validator BIGQUERY_RETRY_VALIDATOR = ConfigDef.Range.atLeast(0); - private static final ConfigDef.Importance BIGQUERY_RETRY_IMPORTANCE = - ConfigDef.Importance.MEDIUM; - private static final String BIGQUERY_RETRY_DOC = - "The number of retry attempts that will be made per BigQuery request that fails with a " - + "backend error or a quota exceeded error"; - - public static final String BIGQUERY_RETRY_WAIT_CONFIG = "bigQueryRetryWait"; - private static final ConfigDef.Type BIGQUERY_RETRY_WAIT_CONFIG_TYPE = ConfigDef.Type.LONG; - public static final Long BIGQUERY_RETRY_WAIT_DEFAULT = 1000L; - private static final ConfigDef.Validator BIGQUERY_RETRY_WAIT_VALIDATOR = - ConfigDef.Range.atLeast(0); - private static final ConfigDef.Importance BIGQUERY_RETRY_WAIT_IMPORTANCE = - ConfigDef.Importance.MEDIUM; - private static final String BIGQUERY_RETRY_WAIT_DOC = - "The minimum amount of time, in milliseconds, to wait between BigQuery backend or quota " - + "exceeded error retry attempts."; - - public static final String BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG = - "bigQueryMessageTimePartitioning"; - private static final ConfigDef.Type BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG_TYPE = - ConfigDef.Type.BOOLEAN; - public static final Boolean BIGQUERY_MESSAGE_TIME_PARTITIONING_DEFAULT = false; - private static final ConfigDef.Importance BIGQUERY_MESSAGE_TIME_PARTITIONING_IMPORTANCE = - ConfigDef.Importance.HIGH; - private static final String BIGQUERY_MESSAGE_TIME_PARTITIONING_DOC = - "Whether or not to use the message time when inserting records. " - + "Default uses the connector processing time."; - - public static final String BIGQUERY_PARTITION_DECORATOR_CONFIG = - "bigQueryPartitionDecorator"; - private static final ConfigDef.Type BIGQUERY_PARTITION_DECORATOR_CONFIG_TYPE = - ConfigDef.Type.BOOLEAN; - //This has been set to true to preserve the existing behavior. However, we can set it to false if field based partitioning is used in BigQuery - public static final Boolean BIGQUERY_PARTITION_DECORATOR_DEFAULT = true; - private static final ConfigDef.Importance BIGQUERY_PARTITION_DECORATOR_IMPORTANCE = - ConfigDef.Importance.HIGH; - private static final String BIGQUERY_PARTITION_DECORATOR_DOC = - "Whether or not to append partition decorator to BigQuery table name when inserting records. " - + "Default is true. Setting this to true appends partition decorator to table name (e.g. table$yyyyMMdd depending on the configuration set for bigQueryPartitionDecorator). " - + "Setting this to false bypasses the logic to append the partition decorator and uses raw table name for inserts."; - - public static final String BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG = "timestampPartitionFieldName"; - private static final ConfigDef.Type BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_TYPE = ConfigDef.Type.STRING; - private static final String BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_DEFAULT = null; - private static final ConfigDef.Importance BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_IMPORTANCE = - ConfigDef.Importance.LOW; - private static final String BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_DOC = - "The name of the field in the value that contains the timestamp to partition by in BigQuery" - + " and enable timestamp partitioning for each table. Leave this configuration blank," - + " to enable ingestion time partitioning for each table."; - - public static final String BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG = "clusteringPartitionFieldNames"; - private static final ConfigDef.Type BIGQUERY_CLUSTERING_FIELD_NAMES_TYPE = ConfigDef.Type.LIST; - private static final List BIGQUERY_CLUSTERING_FIELD_NAMES_DEFAULT = null; - private static final ConfigDef.Importance BIGQUERY_CLUSTERING_FIELD_NAMES_IMPORTANCE = - ConfigDef.Importance.LOW; - private static final String BIGQUERY_CLUSTERING_FIELD_NAMES_DOC = - "List of fields on which data should be clustered by in BigQuery, separated by commas"; - - static { - config = BigQuerySinkConfig.getConfig() - .define( - SCHEMA_UPDATE_CONFIG, - SCHEMA_UPDATE_TYPE, - SCHEMA_UPDATE_DEFAULT, - SCHEMA_UPDATE_IMPORTANCE, - SCHEMA_UPDATE_DOC - ).define( - THREAD_POOL_SIZE_CONFIG, - THREAD_POOL_SIZE_TYPE, - THREAD_POOL_SIZE_DEFAULT, - THREAD_POOL_SIZE_VALIDATOR, - THREAD_POOL_SIZE_IMPORTANCE, - THREAD_POOL_SIZE_DOC - ).define( - QUEUE_SIZE_CONFIG, - QUEUE_SIZE_TYPE, - QUEUE_SIZE_DEFAULT, - QUEUE_SIZE_VALIDATOR, - QUEUE_SIZE_IMPORTANCE, - QUEUE_SIZE_DOC - ).define( - BIGQUERY_RETRY_CONFIG, - BIGQUERY_RETRY_TYPE, - BIGQUERY_RETRY_DEFAULT, - BIGQUERY_RETRY_VALIDATOR, - BIGQUERY_RETRY_IMPORTANCE, - BIGQUERY_RETRY_DOC - ).define( - BIGQUERY_RETRY_WAIT_CONFIG, - BIGQUERY_RETRY_WAIT_CONFIG_TYPE, - BIGQUERY_RETRY_WAIT_DEFAULT, - BIGQUERY_RETRY_WAIT_VALIDATOR, - BIGQUERY_RETRY_WAIT_IMPORTANCE, - BIGQUERY_RETRY_WAIT_DOC - ).define( - BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG, - BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG_TYPE, - BIGQUERY_MESSAGE_TIME_PARTITIONING_DEFAULT, - BIGQUERY_MESSAGE_TIME_PARTITIONING_IMPORTANCE, - BIGQUERY_MESSAGE_TIME_PARTITIONING_DOC - ).define( - BIGQUERY_PARTITION_DECORATOR_CONFIG, - BIGQUERY_PARTITION_DECORATOR_CONFIG_TYPE, - BIGQUERY_PARTITION_DECORATOR_DEFAULT, - BIGQUERY_PARTITION_DECORATOR_IMPORTANCE, - BIGQUERY_PARTITION_DECORATOR_DOC - ).define( - BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG, - BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_TYPE, - BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_DEFAULT, - BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_IMPORTANCE, - BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_DOC - ).define( - BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG, - BIGQUERY_CLUSTERING_FIELD_NAMES_TYPE, - BIGQUERY_CLUSTERING_FIELD_NAMES_DEFAULT, - BIGQUERY_CLUSTERING_FIELD_NAMES_IMPORTANCE, - BIGQUERY_CLUSTERING_FIELD_NAMES_DOC - ); - } - - private void checkAutoUpdateSchemas() { - Class schemaRetriever = getClass(BigQuerySinkConfig.SCHEMA_RETRIEVER_CONFIG); - - boolean autoUpdateSchemas = getBoolean(SCHEMA_UPDATE_CONFIG); - if (autoUpdateSchemas && schemaRetriever == null) { - throw new ConfigException( - "Cannot specify automatic table creation without a schema retriever" - ); - } - if (schemaRetriever == null) { - logger.warn( - "No schema retriever class provided; auto schema updates are impossible" - ); - } - } - - /** - * Returns the field name to use for timestamp partitioning. - * @return String that represents the field name. - */ - public Optional getTimestampPartitionFieldName() { - return Optional.ofNullable(getString(BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG)); - } - - /** - * Returns the field names to use for clustering. - * @return List of Strings that represent the field names. - */ - public Optional> getClusteringPartitionFieldName() { - return Optional.ofNullable(getList(BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG)); - } - - /** - * Check the validity of table partitioning configs. - */ - private void checkPartitionConfigs() { - if (getTimestampPartitionFieldName().isPresent() && getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)) { - throw new ConfigException( - "Only one partitioning configuration mode may be specified for the connector. " - + "Use either bigQueryPartitionDecorator OR timestampPartitionFieldName." - ); - } - } - - /** - * Check the validity of table clustering configs. - */ - private void checkClusteringConfigs() { - if (getClusteringPartitionFieldName().isPresent()) { - if (!getTimestampPartitionFieldName().isPresent() && !getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)) { - throw new ConfigException( - "Clustering field name may be specified only on a partitioned table." - ); - } - if (getClusteringPartitionFieldName().get().size() > 4) { - throw new ConfigException( - "You can only specify up to four clustering field names." - ); - } - } - } + public static final String GCS_BQ_TASK_CONFIG = "GCSBQTask"; + private static final ConfigDef.Type GCS_BQ_TASK_TYPE = ConfigDef.Type.BOOLEAN; + private static final boolean GCS_BQ_TASK_DEFAULT = false; + private static final ConfigDef.Importance GCS_BQ_TASK_IMPORTANCE = ConfigDef.Importance.LOW; - public static ConfigDef getConfig() { - return config; + private static ConfigDef config() { + return BigQuerySinkConfig.getConfig() + .defineInternal(GCS_BQ_TASK_CONFIG, GCS_BQ_TASK_TYPE, GCS_BQ_TASK_DEFAULT, GCS_BQ_TASK_IMPORTANCE); } /** * @param properties A Map detailing configuration properties and their respective values. */ public BigQuerySinkTaskConfig(Map properties) { - super(config, properties); - checkAutoUpdateSchemas(); - checkPartitionConfigs(); - checkClusteringConfigs(); + super(config(), properties); } } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/CredentialsValidator.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/CredentialsValidator.java new file mode 100644 index 000000000..76007d11a --- /dev/null +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/CredentialsValidator.java @@ -0,0 +1,117 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.storage.Storage; +import com.wepay.kafka.connect.bigquery.GcpClientBuilder; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.KEYFILE_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.KEY_SOURCE_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.PROJECT_CONFIG; + +public abstract class CredentialsValidator> extends MultiPropertyValidator { + + public CredentialsValidator() { + super(KEYFILE_CONFIG); + } + + private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( + PROJECT_CONFIG, KEY_SOURCE_CONFIG + )); + + @Override + protected Collection dependents() { + return DEPENDENTS; + } + + @Override + protected Optional doValidate(BigQuerySinkConfig config) { + String keyFile = config.getKey(); + if (keyFile == null || keyFile.isEmpty()) { + // No credentials to validate + return Optional.empty(); + } + + try { + clientBuilder() + .withConfig(config) + .build(); + return Optional.empty(); + } catch (RuntimeException e) { + String errorMessage = "An unexpected error occurred while validating credentials for " + gcpService(); + if (e.getMessage() != null) { + errorMessage += ": " + e.getMessage(); + } + return Optional.of(errorMessage); + } + } + + protected abstract String gcpService(); + protected abstract ClientBuilder clientBuilder(); + + public static class BigQueryCredentialsValidator extends CredentialsValidator> { + @Override + public String gcpService() { + return "BigQuery"; + } + + @Override + protected GcpClientBuilder clientBuilder() { + return new GcpClientBuilder.BigQueryBuilder(); + } + } + + public static class GcsCredentialsValidator extends CredentialsValidator> { + + private static final Collection DEPENDENTS; + + static { + List dependents = new ArrayList<>(CredentialsValidator.DEPENDENTS); + dependents.add(ENABLE_BATCH_CONFIG); + dependents.add(GCS_BUCKET_NAME_CONFIG); + DEPENDENTS = Collections.unmodifiableCollection(dependents); + } + + @Override + public Collection dependents() { + return DEPENDENTS; + } + + @Override + public String gcpService() { + return "GCS"; + } + + @Override + protected GcpClientBuilder clientBuilder() { + return new GcpClientBuilder.GcsBuilder(); + } + } +} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidator.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidator.java new file mode 100644 index 000000000..19141c0c7 --- /dev/null +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidator.java @@ -0,0 +1,63 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG; + +public class GcsBucketValidator extends MultiPropertyValidator { + + public GcsBucketValidator() { + super(GCS_BUCKET_NAME_CONFIG); + } + + private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( + ENABLE_BATCH_CONFIG + )); + + @Override + protected Collection dependents() { + return DEPENDENTS; + } + + @Override + protected Optional doValidate(BigQuerySinkConfig config) { + List batchLoadedTopics = config.getList(ENABLE_BATCH_CONFIG); + if (batchLoadedTopics == null || batchLoadedTopics.isEmpty()) { + // Batch loading is disabled; no need to validate the GCS bucket + return Optional.empty(); + } + + String bucket = config.getString(GCS_BUCKET_NAME_CONFIG); + if (bucket == null || bucket.trim().isEmpty()) { + return Optional.of("When GCS batch loading is enabled, a bucket must be provided"); + } + + // No need to validate that the bucket exists; we create it automatically if it doesn't + + return Optional.empty(); + } +} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidator.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidator.java new file mode 100644 index 000000000..95b9c2da6 --- /dev/null +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidator.java @@ -0,0 +1,70 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import org.apache.kafka.common.config.ConfigValue; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +public abstract class MultiPropertyValidator { + + private final String propertyName; + + protected MultiPropertyValidator(String propertyName) { + this.propertyName = propertyName; + } + + public String propertyName() { + return propertyName; + } + + public Optional validate(ConfigValue value, Config config, Map valuesByName) { + // Only perform follow-up validation if the property doesn't already have an error associated with it + if (!value.errorMessages().isEmpty()) { + return Optional.empty(); + } + + boolean dependentsAreValid = dependents().stream() + .map(valuesByName::get) + .filter(Objects::nonNull) + .map(ConfigValue::errorMessages) + .allMatch(List::isEmpty); + // Also ensure that all of the other properties that the validation for this one depends on don't already have errors + if (!dependentsAreValid) { + return Optional.empty(); + } + + try { + return doValidate(config); + } catch (RuntimeException e) { + return Optional.of( + "An unexpected error occurred during validation" + + (e.getMessage() != null ? ": " + e.getMessage() : "") + ); + } + } + + protected abstract Collection dependents(); + protected abstract Optional doValidate(Config config); +} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidator.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidator.java new file mode 100644 index 000000000..65389e5fd --- /dev/null +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidator.java @@ -0,0 +1,60 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Optional; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG; + +public class PartitioningModeValidator extends MultiPropertyValidator { + public PartitioningModeValidator() { + super(BIGQUERY_PARTITION_DECORATOR_CONFIG); + } + + private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( + BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG + )); + + @Override + protected Collection dependents() { + return DEPENDENTS; + } + + @Override + protected Optional doValidate(BigQuerySinkConfig config) { + if (!config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)) { + return Optional.empty(); + } + + if (config.getTimestampPartitionFieldName().isPresent()) { + return Optional.of(String.format("Only one partitioning mode may be specified for the connector. " + + "Use either %s OR %s.", + BIGQUERY_PARTITION_DECORATOR_CONFIG, + BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG + )); + } else { + return Optional.empty(); + } + } +} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/SchemaRetrieverValidator.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/SchemaRetrieverValidator.java new file mode 100644 index 000000000..9cb6a3894 --- /dev/null +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/SchemaRetrieverValidator.java @@ -0,0 +1,105 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Optional; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.SCHEMA_RETRIEVER_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.SCHEMA_UPDATE_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG; + +public abstract class SchemaRetrieverValidator extends MultiPropertyValidator { + + protected SchemaRetrieverValidator(String propertyName) { + super(propertyName); + } + + private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( + SCHEMA_RETRIEVER_CONFIG + )); + + @Override + protected Collection dependents() { + return DEPENDENTS; + } + + @Override + protected Optional doValidate(BigQuerySinkConfig config) { + if (!schemaRetrieverRequired(config)) { + return Optional.empty(); + } + + SchemaRetriever schemaRetriever = config.getSchemaRetriever(); + if (schemaRetriever != null) { + return Optional.empty(); + } else { + return Optional.of(missingSchemaRetrieverMessage()); + } + } + + /** + * @param config the user-provided configuration + * @return whether a schema retriever class is required for the property this validator is responsible for + */ + protected abstract boolean schemaRetrieverRequired(BigQuerySinkConfig config); + + /** + * @return an error message explaining why a schema retriever class is required for the property this validator is + * responsible for + */ + protected abstract String missingSchemaRetrieverMessage(); + + public static class TableCreationValidator extends SchemaRetrieverValidator { + public TableCreationValidator() { + super(TABLE_CREATE_CONFIG); + } + + @Override + protected boolean schemaRetrieverRequired(BigQuerySinkConfig config) { + return config.getBoolean(TABLE_CREATE_CONFIG); + } + + @Override + protected String missingSchemaRetrieverMessage() { + return "A schema retriever class is required when automatic table creation is enabled"; + } + } + + public static class SchemaUpdateValidator extends SchemaRetrieverValidator { + public SchemaUpdateValidator() { + super(SCHEMA_UPDATE_CONFIG); + } + + @Override + protected boolean schemaRetrieverRequired(BigQuerySinkConfig config) { + return config.getBoolean(SCHEMA_UPDATE_CONFIG); + } + + @Override + protected String missingSchemaRetrieverMessage() { + return "A schema retriever class is required when automatic schema updates are enabled"; + } + } +} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/TableExistenceValidator.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/TableExistenceValidator.java new file mode 100644 index 000000000..149b20380 --- /dev/null +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/TableExistenceValidator.java @@ -0,0 +1,108 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.TableId; +import com.google.common.annotations.VisibleForTesting; +import com.wepay.kafka.connect.bigquery.GcpClientBuilder; +import com.wepay.kafka.connect.bigquery.utils.TopicToTableResolver; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.DATASETS_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.KEYFILE_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.KEY_SOURCE_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.PROJECT_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.SANITIZE_TOPICS_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG; + +public class TableExistenceValidator extends MultiPropertyValidator { + + public TableExistenceValidator() { + super(TABLE_CREATE_CONFIG); + } + + private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( + SANITIZE_TOPICS_CONFIG, + KEY_SOURCE_CONFIG, + KEYFILE_CONFIG, + PROJECT_CONFIG, + DATASETS_CONFIG + )); + + @Override + protected Collection dependents() { + return DEPENDENTS; + } + + @Override + protected Optional doValidate(BigQuerySinkConfig config) { + BigQuery bigQuery; + try { + bigQuery = new GcpClientBuilder.BigQueryBuilder() + .withConfig(config) + .build(); + } catch (RuntimeException e) { + return Optional.of(String.format( + "Failed to construct BigQuery client%s", + e.getMessage() != null ? ": " + e.getMessage() : "" + )); + } + + return doValidate(bigQuery, config); + } + + @VisibleForTesting + Optional doValidate(BigQuery bigQuery, BigQuerySinkConfig config) { + boolean autoCreateTables = config.getBoolean(TABLE_CREATE_CONFIG); + // No need to check if tables already exist if we're allowed to create them ourselves + if (autoCreateTables) { + return Optional.empty(); + } + + List missingTables = missingTables(bigQuery, config); + + if (missingTables.isEmpty()) { + return Optional.empty(); + } + + return Optional.of(String.format( + "Automatic table creation is disabled and the following tables do not appear to exist: %s. " + + "Please either manually create these tables before restarting the connector or enable automatic table " + + "creation by the connector.", + missingTables.stream().map(t -> t.getDataset() + ":" + t.getTable()).collect(Collectors.joining(", ")) + )); + } + + @VisibleForTesting + List missingTables(BigQuery bigQuery, BigQuerySinkConfig config) { + return TopicToTableResolver.getTopicsToTables(config).values().stream() + .filter(t -> bigQuery.getTable(t) == null) + .collect(Collectors.toList()); + } +} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/BigQueryRecordConverter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/BigQueryRecordConverter.java index 835935bd8..d5985000b 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/BigQueryRecordConverter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/BigQueryRecordConverter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert; import com.google.cloud.bigquery.InsertAllRequest.RowToInsert; import com.wepay.kafka.connect.bigquery.api.KafkaSchemaRecordType; @@ -53,6 +54,7 @@ public class BigQueryRecordConverter implements RecordConverter()); + List fields = kafkaConnectSchema.fields().stream() .flatMap(kafkaConnectField -> convertField(kafkaConnectField.schema(), kafkaConnectField.name()) @@ -114,6 +119,35 @@ public com.google.cloud.bigquery.Schema convertSchema(Schema kafkaConnectSchema) return com.google.cloud.bigquery.Schema.of(fields); } + private void throwOnCycle(Schema kafkaConnectSchema, List seenSoFar) { + if (PRIMITIVE_TYPE_MAP.containsKey(kafkaConnectSchema.type())) { + return; + } + + if (seenSoFar.contains(kafkaConnectSchema)) { + throw new ConversionConnectException("Kafka Connect schema contains cycle"); + } + + seenSoFar.add(kafkaConnectSchema); + switch(kafkaConnectSchema.type()) { + case ARRAY: + throwOnCycle(kafkaConnectSchema.valueSchema(), seenSoFar); + break; + case MAP: + throwOnCycle(kafkaConnectSchema.keySchema(), seenSoFar); + throwOnCycle(kafkaConnectSchema.valueSchema(), seenSoFar); + break; + case STRUCT: + kafkaConnectSchema.fields().forEach(f -> throwOnCycle(f.schema(), seenSoFar)); + break; + default: + throw new ConversionConnectException( + "Unrecognized schema type: " + kafkaConnectSchema.type() + ); + } + seenSoFar.remove(seenSoFar.size() - 1); + } + private Optional convertField(Schema kafkaConnectSchema, String fieldName) { Optional result; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataBuilder.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataBuilder.java index 2de1888e0..e3dba69df 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataBuilder.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataBuilder.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.convert; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/RecordConverter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/RecordConverter.java index 0c41b21c6..9a9a38efe 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/RecordConverter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/RecordConverter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert; import com.wepay.kafka.connect.bigquery.api.KafkaSchemaRecordType; import org.apache.kafka.connect.sink.SinkRecord; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/SchemaConverter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/SchemaConverter.java index 985c736f8..8ca2e68b7 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/SchemaConverter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/SchemaConverter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert; import org.apache.kafka.connect.data.Schema; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/DebeziumLogicalConverters.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/DebeziumLogicalConverters.java index ab86b9ea5..4806e11c0 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/DebeziumLogicalConverters.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/DebeziumLogicalConverters.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert.logicaltype; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert.logicaltype; import com.google.cloud.bigquery.LegacySQLTypeName; @@ -46,6 +47,7 @@ public class DebeziumLogicalConverters { LogicalConverterRegistry.register(MicroTimestamp.SCHEMA_NAME, new MicroTimestampConverter()); LogicalConverterRegistry.register(Time.SCHEMA_NAME, new TimeConverter()); LogicalConverterRegistry.register(ZonedTimestamp.SCHEMA_NAME, new ZonedTimestampConverter()); + LogicalConverterRegistry.register(Timestamp.SCHEMA_NAME, new TimestampConverter()); } private static final int MICROS_IN_SEC = 1000000; @@ -100,7 +102,7 @@ public String convert(Object kafkaConnectObject) { Long microRemainder = microTimestamp % MICROS_IN_SEC; - return formattedSecondsTimestamp + "." + microRemainder; + return formattedSecondsTimestamp + "." + String.format("%06d", microRemainder); } } @@ -131,7 +133,7 @@ public String convert(Object kafkaConnectObject) { Long microRemainder = microTimestamp % MICROS_IN_SEC; - return formattedSecondsTimestamp + "." + microRemainder; + return formattedSecondsTimestamp + "." + String.format("%06d", microRemainder); } } @@ -150,7 +152,7 @@ public TimeConverter() { @Override public String convert(Object kafkaConnectObject) { - java.util.Date date = new java.util.Date((Long) kafkaConnectObject); + java.util.Date date = new java.util.Date((Integer) kafkaConnectObject); return getBQTimeFormat().format(date); } } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConverters.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConverters.java index 9a0176b4d..6d3685ac5 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConverters.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConverters.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert.logicaltype; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,12 +17,14 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert.logicaltype; import com.google.cloud.bigquery.LegacySQLTypeName; import org.apache.kafka.connect.data.Date; import org.apache.kafka.connect.data.Decimal; import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Time; import org.apache.kafka.connect.data.Timestamp; import java.math.BigDecimal; @@ -36,6 +38,7 @@ public class KafkaLogicalConverters { LogicalConverterRegistry.register(Date.LOGICAL_NAME, new DateConverter()); LogicalConverterRegistry.register(Decimal.LOGICAL_NAME, new DecimalConverter()); LogicalConverterRegistry.register(Timestamp.LOGICAL_NAME, new TimestampConverter()); + LogicalConverterRegistry.register(Time.LOGICAL_NAME, new TimeConverter()); } /** @@ -95,4 +98,24 @@ public String convert(Object kafkaConnectObject) { return getBqTimestampFormat().format((java.util.Date) kafkaConnectObject); } } + + + /** + * Class for converting Kafka time logical types to BigQuery time types. + */ + public static class TimeConverter extends LogicalTypeConverter { + /** + * Create a new TimestampConverter. + */ + public TimeConverter() { + super(Time.LOGICAL_NAME, + Schema.Type.INT32, + LegacySQLTypeName.TIME); + } + + @Override + public String convert(Object kafkaConnectObject) { + return getBqTimeFormat().format((java.util.Date) kafkaConnectObject); + } + } } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalConverterRegistry.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalConverterRegistry.java index 36757de47..b21bcf613 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalConverterRegistry.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalConverterRegistry.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert.logicaltype; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert.logicaltype; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalTypeConverter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalTypeConverter.java index 39e22167b..9adaa330b 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalTypeConverter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalTypeConverter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert.logicaltype; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert.logicaltype; import com.google.cloud.bigquery.LegacySQLTypeName; @@ -87,10 +88,10 @@ protected static SimpleDateFormat getBqTimestampFormat() { return bqTimestampFormat; } - protected static SimpleDateFormat getBQDatetimeFormat() { - SimpleDateFormat bqDateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); - bqDateTimeFormat.setTimeZone(utcTimeZone); - return bqDateTimeFormat; + protected SimpleDateFormat getBqTimeFormat() { + SimpleDateFormat bqTimestampFormat = new SimpleDateFormat("HH:mm:ss.SSS"); + bqTimestampFormat.setTimeZone(utcTimeZone); + return bqTimestampFormat; } protected static SimpleDateFormat getBQDateFormat() { diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryConnectException.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryConnectException.java index 40fefd7da..847b14542 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryConnectException.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryConnectException.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.exception; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.exception; import com.google.cloud.bigquery.BigQueryError; @@ -52,8 +53,9 @@ private static String formatInsertAllErrors(Map> error for (Map.Entry> errorsEntry : errorsMap.entrySet()) { for (BigQueryError error : errorsEntry.getValue()) { messageBuilder.append(String.format( - "%n\t[row index %d]: %s: %s", + "%n\t[row index %d] (location %s, reason: %s): %s", errorsEntry.getKey(), + error.getLocation(), error.getReason(), error.getMessage() )); @@ -61,4 +63,10 @@ private static String formatInsertAllErrors(Map> error } return messageBuilder.toString(); } + + @Override + public String toString() { + return getCause() != null ? + super.toString() + "\nCaused by: " + getCause().getLocalizedMessage() : super.toString(); + } } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ConversionConnectException.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ConversionConnectException.java index 795ea6749..29e10bd43 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ConversionConnectException.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ConversionConnectException.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.exception; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.exception; import org.apache.kafka.connect.errors.ConnectException; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/GCSConnectException.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/GCSConnectException.java index 1ed2ae885..c676b50e5 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/GCSConnectException.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/GCSConnectException.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.exception; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.exception; import org.apache.kafka.connect.errors.ConnectException; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/SinkConfigConnectException.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/SinkConfigConnectException.java deleted file mode 100644 index 98a11c069..000000000 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/SinkConfigConnectException.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.wepay.kafka.connect.bigquery.exception; - -/* - * Copyright 2016 WePay, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import org.apache.kafka.connect.errors.ConnectException; - -/** - * Class for exceptions that occur while attempting to process configuration files, including both - * formatting and logical errors. - */ -public class SinkConfigConnectException extends ConnectException { - public SinkConfigConnectException(String msg) { - super(msg); - } - - public SinkConfigConnectException(String msg, Throwable thr) { - super(msg, thr); - } - - public SinkConfigConnectException(Throwable thr) { - super(thr); - } -} diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/retrieve/MemorySchemaRetriever.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/retrieve/MemorySchemaRetriever.java index 9e89fa114..c0d2526a1 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/retrieve/MemorySchemaRetriever.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/retrieve/MemorySchemaRetriever.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.retrieve; import com.google.cloud.bigquery.TableId; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizer.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizer.java index 09aeb70c2..c999b08ab 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizer.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizer.java @@ -1,7 +1,26 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.utils; +import java.util.HashMap; import java.util.Map; -import java.util.stream.Collectors; public class FieldNameSanitizer { @@ -20,15 +39,17 @@ public static String sanitizeName(String name) { // letters, numbers, and underscores. // Note: a.b and a/b will have the same value after sanitization which will cause Duplicate key // Exception. + @SuppressWarnings("unchecked") public static Map replaceInvalidKeys(Map map) { - return map.entrySet().stream().collect(Collectors.toMap( - (entry) -> sanitizeName(entry.getKey()), - (entry) -> { - if (entry.getValue() instanceof Map) { - return replaceInvalidKeys((Map) entry.getValue()); - } - return entry.getValue(); - } - )); + Map result = new HashMap<>(); + map.forEach((key, value) -> { + String sanitizedKey = sanitizeName(key); + if (value instanceof Map) { + result.put(sanitizedKey, replaceInvalidKeys((Map) value)); + } else { + result.put(sanitizedKey, value); + } + }); + return result; } } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableId.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableId.java index 4f3e3ee48..28ae9b602 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableId.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableId.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.utils; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.utils; import com.google.cloud.bigquery.TableId; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/TopicToTableResolver.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/TopicToTableResolver.java index 959335288..fcd984000 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/TopicToTableResolver.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/TopicToTableResolver.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.utils; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.utils; import com.google.cloud.bigquery.TableId; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/Version.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/Version.java index bbcdfae38..8a6c6f4be 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/Version.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/Version.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.utils; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.utils; /** * Utility class for unifying the version of a project. All other references to version number diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/CountDownRunnable.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/CountDownRunnable.java index cbdca4ea6..70edc8a1a 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/CountDownRunnable.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/CountDownRunnable.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.batch; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.batch; import org.apache.kafka.connect.errors.ConnectException; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/GCSBatchTableWriter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/GCSBatchTableWriter.java index 48a9512a6..6e91d99ad 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/GCSBatchTableWriter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/GCSBatchTableWriter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.batch; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.batch; import com.google.cloud.bigquery.InsertAllRequest.RowToInsert; import com.google.cloud.bigquery.TableId; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/KCBQThreadPoolExecutor.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/KCBQThreadPoolExecutor.java index 637ac2ea4..491d9b0a1 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/KCBQThreadPoolExecutor.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/KCBQThreadPoolExecutor.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.batch; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,21 +17,20 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.batch; import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; +import org.apache.kafka.connect.errors.ConnectException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; +import java.util.Optional; import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; +import java.util.concurrent.atomic.AtomicReference; /** * ThreadPoolExecutor for writing Rows to BigQuery. @@ -43,9 +42,7 @@ public class KCBQThreadPoolExecutor extends ThreadPoolExecutor { private static final Logger logger = LoggerFactory.getLogger(KCBQThreadPoolExecutor.class); - - private ConcurrentHashMap.KeySetView encounteredErrors = - ConcurrentHashMap.newKeySet(); + private final AtomicReference encounteredError = new AtomicReference<>(); /** * @param config the {@link BigQuerySinkTaskConfig} @@ -65,11 +62,10 @@ protected void afterExecute(Runnable runnable, Throwable throwable) { super.afterExecute(runnable, throwable); if (throwable != null) { - logger.error("Task failed with {} error: {}", - throwable.getClass().getName(), - throwable.getMessage()); - logger.debug("Error Task Stacktrace:", throwable); - encounteredErrors.add(throwable); + // Log at debug level since this will be shown to the user at error level by the Connect framework if it causes + // the task to fail, and will otherwise just pollute logs and potentially mislead users + logger.debug("A write thread has failed with an unrecoverable error", throwable); + encounteredError.compareAndSet(null, throwable); } } @@ -91,19 +87,18 @@ public void awaitCurrentTasks() throws InterruptedException, BigQueryConnectExce execute(new CountDownRunnable(countDownLatch)); } countDownLatch.await(); - if (encounteredErrors.size() > 0) { - String errorString = createErrorString(encounteredErrors); - encounteredErrors.clear(); - throw new BigQueryConnectException("Some write threads encountered unrecoverable errors: " - + errorString + "; See logs for more detail"); - } + maybeThrowEncounteredError(); } - private static String createErrorString(Collection errors) { - List exceptionTypeStrings = new ArrayList<>(errors.size()); - exceptionTypeStrings.addAll(errors.stream() - .map(throwable -> throwable.getClass().getName()) - .collect(Collectors.toList())); - return String.join(", ", exceptionTypeStrings); + /** + * Immediately throw an exception if any unrecoverable errors were encountered by any of the write + * tasks. + * + * @throws BigQueryConnectException if any of the tasks failed. + */ + public void maybeThrowEncounteredError() { + Optional.ofNullable(encounteredError.get()).ifPresent(t -> { + throw new BigQueryConnectException("A write thread has failed with an unrecoverable error", t); + }); } } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriter.java index 0f418bed0..53f49e895 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.batch; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,16 +17,17 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.batch; import com.google.cloud.bigquery.BigQueryException; import com.google.cloud.bigquery.InsertAllRequest.RowToInsert; import com.wepay.kafka.connect.bigquery.convert.RecordConverter; +import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; import com.wepay.kafka.connect.bigquery.utils.PartitionedTableId; import com.wepay.kafka.connect.bigquery.write.row.BigQueryWriter; import org.apache.kafka.connect.errors.ConnectException; -import org.apache.kafka.connect.sink.SinkRecord; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +44,7 @@ public class TableWriter implements Runnable { private static final int BAD_REQUEST_CODE = 400; private static final String INVALID_REASON = "invalid"; + private static final String PAYLOAD_TOO_LARGE_REASON = "Request payload size exceeds the limit:"; private final BigQueryWriter writer; private final PartitionedTableId table; @@ -84,7 +86,10 @@ public void run() { logger.warn("Could not write batch of size {} to BigQuery.", currentBatch.size(), err); if (isBatchSizeError(err)) { failureCount++; - currentBatchSize = getNewBatchSize(currentBatchSize); + currentBatchSize = getNewBatchSize(currentBatchSize, err); + } else { + // Throw exception on write errors such as 403. + throw new BigQueryConnectException("Failed to write to table", err); } } } @@ -104,10 +109,26 @@ public void run() { } - private static int getNewBatchSize(int currentBatchSize) { + private static int getNewBatchSize(int currentBatchSize, Throwable err) { if (currentBatchSize == 1) { - // todo correct exception type? - throw new ConnectException("Attempted to reduce batch size below 1."); + logger.error("Attempted to reduce batch size below 1"); + throw new BigQueryConnectException( + "Failed to write to BigQuery even after reducing batch size to 1 row at a time. " + + "This can indicate an error in the connector's logic for classifying BigQuery errors, as non-retriable" + + "errors may be being treated as retriable." + + "If that appears to be the case, please report the issue to the project's maintainers and include the " + + "complete stack trace for this error as it appears in the logs. " + + "Alternatively, there may be a record that the connector has read from Kafka that is too large to " + + "write to BigQuery using the streaming insert API, which cannot be addressed with a change to the " + + "connector and will need to be handled externally by optionally writing the record to BigQuery using " + + "another means and then reconfiguring the connector to skip the record. " + + "Finally, streaming insert quotas for BigQuery may be causing insertion failures for the connector; " + + "in that case, please ensure that quotas for maximum rows per second, maximum bytes per second, etc. " + + "are being respected before restarting the connector. " + + "The cause of this exception is the error encountered from BigQuery after the last attempt to write a " + + "batch was made.", + err + ); } // round batch size up so we don't end up with a dangling 1 row at the end. return (int) Math.ceil(currentBatchSize / 2.0); @@ -137,6 +158,10 @@ private static boolean isBatchSizeError(BigQueryException exception) { * todo distinguish this from other invalids (like invalid table schema). */ return true; + } else if (exception.getCode() == BAD_REQUEST_CODE + && exception.getMessage() != null + && exception.getMessage().contains(PAYLOAD_TOO_LARGE_REASON)) { + return true; } return false; } diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriterBuilder.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriterBuilder.java index 12bdd4c16..9051fcbf1 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriterBuilder.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriterBuilder.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.batch; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.batch; import com.google.cloud.bigquery.InsertAllRequest.RowToInsert; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/AdaptiveBigQueryWriter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/AdaptiveBigQueryWriter.java index 2b64085d5..68879f78d 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/AdaptiveBigQueryWriter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/AdaptiveBigQueryWriter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.row; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.row; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.TableId; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/BigQueryWriter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/BigQueryWriter.java index d766b1b2a..6210e6c0a 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/BigQueryWriter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/BigQueryWriter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.row; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.row; import com.google.cloud.bigquery.BigQueryError; import com.google.cloud.bigquery.BigQueryException; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/GCSToBQWriter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/GCSToBQWriter.java index d7313919d..d012481af 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/GCSToBQWriter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/GCSToBQWriter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.row; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.row; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryException; diff --git a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/SimpleBigQueryWriter.java b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/SimpleBigQueryWriter.java index b1189473a..8d22af876 100644 --- a/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/SimpleBigQueryWriter.java +++ b/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/SimpleBigQueryWriter.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.row; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.row; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryError; diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnectorTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnectorTest.java index bbe9bc854..06d182694 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnectorTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnectorTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,33 +17,15 @@ * under the License. */ - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNotSame; - -import static org.mockito.Matchers.any; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; +package com.wepay.kafka.connect.bigquery; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.Table; import com.google.cloud.bigquery.TableId; - import com.wepay.kafka.connect.bigquery.api.KafkaSchemaRecordType; import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; - import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; - -import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; -import com.wepay.kafka.connect.bigquery.exception.SinkConfigConnectException; -import org.apache.kafka.common.config.ConfigException; - import org.apache.kafka.connect.data.Schema; - import org.junit.BeforeClass; import org.junit.Test; @@ -51,30 +33,19 @@ import java.util.List; import java.util.Map; -public class BigQuerySinkConnectorTest { - private static SinkConnectorPropertiesFactory propertiesFactory; - - // Would just use Mockito, but can't provide the name of an anonymous class to the config file - public static class MockSchemaRetriever implements SchemaRetriever { - @Override - public void configure(Map properties) { - // Shouldn't be called - } - - @Override - public Schema retrieveSchema(TableId table, String topic, KafkaSchemaRecordType schemaType) { - // Shouldn't be called - return null; - } +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNotSame; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; - @Override - public void setLastSeenSchema(TableId table, String topic, Schema schema) { - } - } +public class BigQuerySinkConnectorTest { + private static SinkPropertiesFactory propertiesFactory; @BeforeClass public static void initializePropertiesFactory() { - propertiesFactory = new SinkConnectorPropertiesFactory(); + propertiesFactory = new SinkPropertiesFactory(); } @Test @@ -86,15 +57,10 @@ public void testTaskClass() { public void testTaskConfigs() { Map properties = propertiesFactory.getProperties(); - Table fakeTable = mock(Table.class); - - BigQuery bigQuery = mock(BigQuery.class); - when(bigQuery.getTable(any(TableId.class))).thenReturn(fakeTable); + BigQuerySinkConnector testConnector = new BigQuerySinkConnector(); - SchemaManager schemaManager = mock(SchemaManager.class); - BigQuerySinkConnector testConnector = new BigQuerySinkConnector(bigQuery, schemaManager); - - testConnector.start(properties); + testConnector.configProperties = properties; + testConnector.config = new BigQuerySinkConfig(properties); for (int i : new int[] { 1, 2, 10, 100 }) { Map expectedProperties = new HashMap<>(properties); @@ -126,20 +92,7 @@ public void testTaskConfigs() { @Test public void testConfig() { - assertEquals(BigQuerySinkConfig.getConfig(), new BigQuerySinkConnector().config()); - } - - // Make sure that a config exception is properly translated into a SinkConfigConnectException - @Test(expected = SinkConfigConnectException.class) - public void testConfigException() { - try { - Map badProperties = propertiesFactory.getProperties(); - badProperties.remove(BigQuerySinkConfig.TOPICS_CONFIG); - BigQuerySinkConfig.validate(badProperties); - new BigQuerySinkConnector().start(badProperties); - } catch (ConfigException e) { - throw new SinkConfigConnectException(e); - } + assertNotNull(new BigQuerySinkConnector().config()); } @Test diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTaskTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTaskTest.java index af6547568..e5224e32d 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTaskTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTaskTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -38,10 +39,7 @@ import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; -import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; -import com.wepay.kafka.connect.bigquery.exception.SinkConfigConnectException; -import org.apache.kafka.common.config.ConfigException; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.connect.data.Schema; @@ -62,11 +60,11 @@ import java.util.concurrent.RejectedExecutionException; public class BigQuerySinkTaskTest { - private static SinkTaskPropertiesFactory propertiesFactory; + private static SinkPropertiesFactory propertiesFactory; @BeforeClass public static void initializePropertiesFactory() { - propertiesFactory = new SinkTaskPropertiesFactory(); + propertiesFactory = new SinkPropertiesFactory(); } @Test @@ -176,7 +174,7 @@ public void testPutWhenPartitioningOnMessageTime() { Map properties = propertiesFactory.getProperties(); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, ".*=scratch"); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG, "true"); + properties.put(BigQuerySinkConfig.BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG, "true"); BigQuery bigQuery = mock(BigQuery.class); Storage storage = mock(Storage.class); @@ -209,8 +207,8 @@ public void testPutWhenPartitioningIsSetToTrue() { Map properties = propertiesFactory.getProperties(); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, ".*=scratch"); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "true"); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG, "true"); + properties.put(BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "true"); + properties.put(BigQuerySinkConfig.BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG, "true"); BigQuery bigQuery = mock(BigQuery.class); Storage storage = mock(Storage.class); @@ -243,7 +241,7 @@ public void testPutWhenPartitioningIsSetToFalse() { Map properties = propertiesFactory.getProperties(); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, ".*=scratch"); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "false"); + properties.put(BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "false"); BigQuery bigQuery = mock(BigQuery.class); Storage storage = mock(Storage.class); @@ -277,7 +275,7 @@ public void testPutWhenPartitioningOnMessageTimeWhenNoTimestampType() { Map properties = propertiesFactory.getProperties(); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, ".*=scratch"); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG, "true"); + properties.put(BigQuerySinkConfig.BIGQUERY_MESSAGE_TIME_PARTITIONING_CONFIG, "true"); BigQuery bigQuery = mock(BigQuery.class); Storage storage = mock(Storage.class); @@ -298,11 +296,43 @@ public void testPutWhenPartitioningOnMessageTimeWhenNoTimestampType() { TimestampType.NO_TIMESTAMP_TYPE, null))); } - // It's important that the buffer be completely wiped after a call to flush, since any execption - // thrown during flush causes Kafka Connect to not commit the offsets for any records sent to the - // task since the last flush - @Test - public void testBufferClearOnFlushError() { + @Test(expected = BigQueryConnectException.class, timeout = 60000L) + public void testSimplePutException() throws InterruptedException { + final String topic = "test-topic"; + Map properties = propertiesFactory.getProperties(); + properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); + properties.put(BigQuerySinkConfig.DATASETS_CONFIG, ".*=scratch"); + + BigQuery bigQuery = mock(BigQuery.class); + Storage storage = mock(Storage.class); + + SinkTaskContext sinkTaskContext = mock(SinkTaskContext.class); + InsertAllResponse insertAllResponse = mock(InsertAllResponse.class); + when(bigQuery.insertAll(any())).thenReturn(insertAllResponse); + when(insertAllResponse.hasErrors()).thenReturn(true); + when(insertAllResponse.getInsertErrors()).thenReturn(Collections.singletonMap( + 0L, Collections.singletonList(new BigQueryError("no such field", "us-central1", "")))); + + SchemaRetriever schemaRetriever = mock(SchemaRetriever.class); + SchemaManager schemaManager = mock(SchemaManager.class); + + BigQuerySinkTask testTask = new BigQuerySinkTask(bigQuery, schemaRetriever, storage, schemaManager); + testTask.initialize(sinkTaskContext); + testTask.start(properties); + + testTask.put(Collections.singletonList(spoofSinkRecord(topic))); + while (true) { + Thread.sleep(100); + testTask.put(Collections.emptyList()); + } + } + + + // Since any exception thrown during flush causes Kafka Connect to not commit the offsets for any + // records sent to the task since the last flush. The task should fail, and next flush should + // also throw an error. + @Test(expected = BigQueryConnectException.class) + public void testFlushException() { final String dataset = "scratch"; final String topic = "test_topic"; @@ -356,8 +386,8 @@ public void testBigQuery5XXRetry() { final String dataset = "scratch"; Map properties = propertiesFactory.getProperties(); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_CONFIG, "3"); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_WAIT_CONFIG, "2000"); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_CONFIG, "3"); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_WAIT_CONFIG, "2000"); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, String.format(".*=%s", dataset)); @@ -392,8 +422,8 @@ public void testBigQuery403Retry() { final String dataset = "scratch"; Map properties = propertiesFactory.getProperties(); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_CONFIG, "2"); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_WAIT_CONFIG, "2000"); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_CONFIG, "2"); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_WAIT_CONFIG, "2000"); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, String.format(".*=%s", dataset)); @@ -429,8 +459,8 @@ public void testBigQueryRetryExceeded() { final String dataset = "scratch"; Map properties = propertiesFactory.getProperties(); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_CONFIG, "1"); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_WAIT_CONFIG, "2000"); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_CONFIG, "1"); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_WAIT_CONFIG, "2000"); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, String.format(".*=%s", dataset)); @@ -488,25 +518,6 @@ public void testInterruptedException() { testTask.flush(Collections.emptyMap()); } - // Make sure that a ConfigException is properly translated into a SinkConfigConnectException - @Test(expected = SinkConfigConnectException.class) - public void testConfigException() { - try { - Map badProperties = propertiesFactory.getProperties(); - badProperties.remove(BigQuerySinkConfig.TOPICS_CONFIG); - BigQuerySinkConfig.validate(badProperties); - - SchemaRetriever schemaRetriever = mock(SchemaRetriever.class); - SchemaManager schemaManager = mock(SchemaManager.class); - - BigQuerySinkTask testTask = - new BigQuerySinkTask(mock(BigQuery.class), schemaRetriever, mock(Storage.class), schemaManager); - testTask.start(badProperties); - } catch (ConfigException e) { - throw new SinkConfigConnectException(e); - } - } - @Test public void testVersion() { assertNotNull(new BigQuerySinkTask().version()); diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/GCSToBQLoadRunnableTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/GCSToBQLoadRunnableTest.java index 38c2ef30a..af4fb5f48 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/GCSToBQLoadRunnableTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/GCSToBQLoadRunnableTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery; - /* - * Copyright 2018 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SchemaManagerTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SchemaManagerTest.java index 873bf4cb2..22406110e 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SchemaManagerTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SchemaManagerTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkConnectorPropertiesFactory.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkConnectorPropertiesFactory.java deleted file mode 100644 index e47dd8ac2..000000000 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkConnectorPropertiesFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.wepay.kafka.connect.bigquery; - -/* - * Copyright 2016 WePay, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; - -import java.util.Map; - -public class SinkConnectorPropertiesFactory extends SinkPropertiesFactory { - @Override - public Map getProperties() { - Map properties = super.getProperties(); - - properties.put(BigQuerySinkConfig.TABLE_CREATE_CONFIG, "false"); - return properties; - } - - /** - * Make sure that each of the default configuration properties work nicely with the given - * configuration object. - * - * @param config The config object to test - */ - public void testProperties(BigQuerySinkConfig config) { - super.testProperties(config); - - config.getBoolean(config.TABLE_CREATE_CONFIG); - } -} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkPropertiesFactory.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkPropertiesFactory.java index 61e14530b..93db65926 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkPropertiesFactory.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkPropertiesFactory.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery; import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; @@ -34,9 +35,9 @@ public Map getProperties() { Map properties = new HashMap<>(); properties.put(BigQuerySinkConfig.TABLE_CREATE_CONFIG, "false"); + properties.put(BigQuerySinkConfig.SCHEMA_UPDATE_CONFIG, "false"); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, "kcbq-test"); properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "test-project"); - properties.put(BigQuerySinkConfig.DATASETS_CONFIG, ".*=test"); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, "kcbq-test=kcbq-test-table"); properties.put(BigQuerySinkConfig.KEYFILE_CONFIG, "key.json"); @@ -46,27 +47,4 @@ public Map getProperties() { return properties; } - - /** - * Make sure that each of the default configuration properties work nicely with the given - * configuration object. - * - * @param config The config object to test - */ - public void testProperties(BigQuerySinkConfig config) { - config.getTopicsToDatasets(); - - config.getMap(config.DATASETS_CONFIG); - config.getMap(config.TOPICS_TO_TABLES_CONFIG); - - config.getList(config.TOPICS_CONFIG); - config.getList(config.TOPICS_TO_TABLES_CONFIG); - config.getList(config.DATASETS_CONFIG); - - config.getKeyFile(); - config.getString(config.PROJECT_CONFIG); - - config.getBoolean(config.SANITIZE_TOPICS_CONFIG); - config.getInt(config.AVRO_DATA_CACHE_SIZE_CONFIG); - } } diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkTaskPropertiesFactory.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkTaskPropertiesFactory.java deleted file mode 100644 index 453e05a7c..000000000 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkTaskPropertiesFactory.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.wepay.kafka.connect.bigquery; - -/* - * Copyright 2016 WePay, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; -import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; - -import java.util.Map; - -public class SinkTaskPropertiesFactory extends SinkPropertiesFactory { - @Override - public Map getProperties() { - Map properties = super.getProperties(); - - properties.put(BigQuerySinkTaskConfig.SCHEMA_UPDATE_CONFIG, "false"); - properties.put(BigQuerySinkConfig.TABLE_CREATE_CONFIG, "false"); - - return properties; - } - - /** - * Make sure that each of the default configuration properties work nicely with the given - * configuration object. - * - * @param config The config object to test - */ - public void testProperties(BigQuerySinkTaskConfig config) { - super.testProperties(config); - - config.getBoolean(config.SCHEMA_UPDATE_CONFIG); - } -} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfigTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfigTest.java index d8952b0f2..4890f3716 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfigTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfigTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.config; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,10 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.config; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -31,8 +33,12 @@ import org.junit.Before; import org.junit.Test; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Optional; public class BigQuerySinkConfigTest { private SinkPropertiesFactory propertiesFactory; @@ -47,7 +53,11 @@ public void initializePropertiesFactory() { public void metaTestBasicConfigProperties() { Map basicConfigProperties = propertiesFactory.getProperties(); BigQuerySinkConfig config = new BigQuerySinkConfig(basicConfigProperties); - propertiesFactory.testProperties(config); + config.getList(BigQuerySinkConfig.TOPICS_CONFIG); + config.getString(BigQuerySinkConfig.PROJECT_CONFIG); + config.getKey(); + config.getBoolean(BigQuerySinkConfig.SANITIZE_TOPICS_CONFIG); + config.getInt(BigQuerySinkConfig.AVRO_DATA_CACHE_SIZE_CONFIG); } @Test @@ -207,4 +217,74 @@ public void testInvalidAvroCacheSize() { new BigQuerySinkConfig(badConfigProperties); } + + /** + * Test the default for the field name is not present. + */ + @Test + public void testEmptyTimestampPartitionFieldName() { + Map configProperties = propertiesFactory.getProperties(); + BigQuerySinkConfig testConfig = new BigQuerySinkConfig(configProperties); + assertFalse(testConfig.getTimestampPartitionFieldName().isPresent()); + } + + /** + * Test the field name being non-empty and the decorator set to false works correctly. + */ + @Test + public void testTimestampPartitionFieldName() { + Map configProperties = propertiesFactory.getProperties(); + configProperties.put(BigQuerySinkConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG, "name"); + configProperties.put(BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "false"); + BigQuerySinkConfig testConfig = new BigQuerySinkConfig(configProperties); + assertTrue(testConfig.getTimestampPartitionFieldName().isPresent()); + assertFalse(testConfig.getBoolean(BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG)); + } + + /** + * Test the default for the field names is not present. + */ + @Test + public void testEmptyClusteringFieldNames() { + Map configProperties = propertiesFactory.getProperties(); + BigQuerySinkConfig testConfig = new BigQuerySinkConfig(configProperties); + assertFalse(testConfig.getClusteringPartitionFieldNames().isPresent()); + } + + /** + * Test if the field names are more than four fields errors correctly. + */ + @Test (expected = ConfigException.class) + public void testClusteringPartitionFieldNamesWithMoreThanFourFieldsError() { + Map configProperties = propertiesFactory.getProperties(); + configProperties.put(BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "true"); + configProperties.put( + BigQuerySinkConfig.BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG, + "column1,column2,column3,column4,column5" + ); + new BigQuerySinkConfig(configProperties); + } + + /** + * Test the field names being non-empty and the partitioning field exists works correctly. + */ + @Test + public void testClusteringFieldNames() { + Map configProperties = propertiesFactory.getProperties(); + configProperties.put(BigQuerySinkConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG, "name"); + configProperties.put(BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "false"); + configProperties.put( + BigQuerySinkConfig.BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG, + "column1,column2" + ); + + ArrayList expectedClusteringPartitionFieldName = new ArrayList<>( + Arrays.asList("column1", "column2") + ); + + BigQuerySinkConfig testConfig = new BigQuerySinkConfig(configProperties); + Optional> testClusteringPartitionFieldName = testConfig.getClusteringPartitionFieldNames(); + assertTrue(testClusteringPartitionFieldName.isPresent()); + assertEquals(expectedClusteringPartitionFieldName, testClusteringPartitionFieldName.get()); + } } diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfigTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfigTest.java deleted file mode 100644 index edd34bf85..000000000 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfigTest.java +++ /dev/null @@ -1,174 +0,0 @@ -package com.wepay.kafka.connect.bigquery.config; - -/* - * Copyright 2016 WePay, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import com.wepay.kafka.connect.bigquery.SinkTaskPropertiesFactory; - -import org.apache.kafka.common.config.ConfigException; - -import org.junit.Before; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Optional; - -public class BigQuerySinkTaskConfigTest { - private SinkTaskPropertiesFactory propertiesFactory; - - @Before - public void initializePropertiesFactory() { - propertiesFactory = new SinkTaskPropertiesFactory(); - } - - @Test - public void metaTestBasicConfigProperties() { - Map basicConfigProperties = propertiesFactory.getProperties(); - BigQuerySinkTaskConfig config = new BigQuerySinkTaskConfig(basicConfigProperties); - propertiesFactory.testProperties(config); - } - - @Test() - public void testMaxWriteSize() { - // todo: something like this, maybe. - /* - Map badProperties = propertiesFactory.getProperties(); - badProperties.put(BigQuerySinkTaskConfig.MAX_WRITE_CONFIG, "-1"); - - try { - new BigQuerySinkTaskConfig(badProperties); - } catch (ConfigException err) { - fail("Exception encountered before addition of bad configuration field: " + err); - } - - badProperties.put(BigQuerySinkTaskConfig.MAX_WRITE_CONFIG, "0"); - new BigQuerySinkTaskConfig(badProperties); - */ - } - - /** - * Test the default for the field name is not present. - */ - @Test - public void testEmptyTimestampPartitionFieldName() { - Map configProperties = propertiesFactory.getProperties(); - BigQuerySinkTaskConfig testConfig = new BigQuerySinkTaskConfig(configProperties); - assertFalse(testConfig.getTimestampPartitionFieldName().isPresent()); - } - - /** - * Test if the field name being non-empty and the decorator default (true) errors correctly. - */ - @Test (expected = ConfigException.class) - public void testTimestampPartitionFieldNameError() { - Map configProperties = propertiesFactory.getProperties(); - configProperties.put(BigQuerySinkTaskConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG, "name"); - new BigQuerySinkTaskConfig(configProperties); - } - - /** - * Test the field name being non-empty and the decorator set to false works correctly. - */ - @Test - public void testTimestampPartitionFieldName() { - Map configProperties = propertiesFactory.getProperties(); - configProperties.put(BigQuerySinkTaskConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG, "name"); - configProperties.put(BigQuerySinkTaskConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "false"); - BigQuerySinkTaskConfig testConfig = new BigQuerySinkTaskConfig(configProperties); - assertTrue(testConfig.getTimestampPartitionFieldName().isPresent()); - assertFalse(testConfig.getBoolean(BigQuerySinkTaskConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG)); - } - - /** - * Test the default for the field names is not present. - */ - @Test - public void testEmptyClusteringFieldNames() { - Map configProperties = propertiesFactory.getProperties(); - BigQuerySinkTaskConfig testConfig = new BigQuerySinkTaskConfig(configProperties); - assertFalse(testConfig.getClusteringPartitionFieldName().isPresent()); - } - - /** - * Test if the field names being non-empty and the partitioning is not present errors correctly. - */ - @Test (expected = ConfigException.class) - public void testClusteringFieldNamesWithoutTimestampPartitionError() { - Map configProperties = propertiesFactory.getProperties(); - configProperties.put(BigQuerySinkTaskConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG, null); - configProperties.put(BigQuerySinkTaskConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "false"); - configProperties.put( - BigQuerySinkTaskConfig.BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG, - "column1,column2" - ); - new BigQuerySinkTaskConfig(configProperties); - } - - /** - * Test if the field names are more than four fields errors correctly. - */ - @Test (expected = ConfigException.class) - public void testClusteringPartitionFieldNamesWithMoreThanFourFieldsError() { - Map configProperties = propertiesFactory.getProperties(); - configProperties.put(BigQuerySinkTaskConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "true"); - configProperties.put( - BigQuerySinkTaskConfig.BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG, - "column1,column2,column3,column4,column5" - ); - new BigQuerySinkTaskConfig(configProperties); - } - - /** - * Test the field names being non-empty and the partitioning field exists works correctly. - */ - @Test - public void testClusteringFieldNames() { - Map configProperties = propertiesFactory.getProperties(); - configProperties.put(BigQuerySinkTaskConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG, "name"); - configProperties.put(BigQuerySinkTaskConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG, "false"); - configProperties.put( - BigQuerySinkTaskConfig.BIGQUERY_CLUSTERING_FIELD_NAMES_CONFIG, - "column1,column2" - ); - - ArrayList expectedClusteringPartitionFieldName = new ArrayList<>( - Arrays.asList("column1", "column2") - ); - - BigQuerySinkTaskConfig testConfig = new BigQuerySinkTaskConfig(configProperties); - Optional> testClusteringPartitionFieldName = testConfig.getClusteringPartitionFieldName(); - assertTrue(testClusteringPartitionFieldName.isPresent()); - assertEquals(expectedClusteringPartitionFieldName, testClusteringPartitionFieldName.get()); - } - - @Test(expected = ConfigException.class) - public void testAutoSchemaUpdateWithoutRetriever() { - Map badConfigProperties = propertiesFactory.getProperties(); - badConfigProperties.remove(BigQuerySinkTaskConfig.SCHEMA_RETRIEVER_CONFIG); - badConfigProperties.put(BigQuerySinkTaskConfig.SCHEMA_UPDATE_CONFIG, "true"); - - new BigQuerySinkTaskConfig(badConfigProperties); - } -} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/CredentialsValidatorTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/CredentialsValidatorTest.java new file mode 100644 index 000000000..7a55d5ad8 --- /dev/null +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/CredentialsValidatorTest.java @@ -0,0 +1,69 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import com.wepay.kafka.connect.bigquery.GcpClientBuilder; +import org.junit.Test; + +import java.util.Optional; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class CredentialsValidatorTest { + + @Test + public void testNoCredentialsSkipsValidation() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getKey()).thenReturn(null); + + assertEquals( + Optional.empty(), + new CredentialsValidator.BigQueryCredentialsValidator().doValidate(config) + ); + assertEquals( + Optional.empty(), + new CredentialsValidator.GcsCredentialsValidator().doValidate(config) + ); + } + + @Test + public void testFailureToConstructClient() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getKey()).thenReturn("key"); + + @SuppressWarnings("unchecked") + GcpClientBuilder mockClientBuilder = mock(GcpClientBuilder.class); + when(mockClientBuilder.withConfig(eq(config))).thenReturn(mockClientBuilder); + when(mockClientBuilder.build()).thenThrow(new RuntimeException("Provided credentials are invalid")); + + assertNotEquals( + Optional.empty(), + new CredentialsValidator.BigQueryCredentialsValidator().doValidate(config) + ); + assertNotEquals( + Optional.empty(), + new CredentialsValidator.GcsCredentialsValidator().doValidate(config) + ); + } +} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidatorTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidatorTest.java new file mode 100644 index 000000000..d46832678 --- /dev/null +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidatorTest.java @@ -0,0 +1,93 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import org.junit.Test; + +import java.util.Collections; +import java.util.Optional; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class GcsBucketValidatorTest { + + @Test + public void testNullBatchLoadingSkipsValidation() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(null); + + assertEquals( + Optional.empty(), + new GcsBucketValidator().doValidate(config) + ); + } + + @Test + public void testEmptyBatchLoadingSkipsValidation() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.emptyList()); + + assertEquals( + Optional.empty(), + new GcsBucketValidator().doValidate(config) + ); + } + + @Test + public void testNullBucketWithBatchLoading() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); + when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(null); + + assertNotEquals( + Optional.empty(), + new GcsBucketValidator().doValidate(config) + ); + } + + @Test + public void testBlankBucketWithBatchLoading() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); + when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(" \t "); + + assertNotEquals( + Optional.empty(), + new GcsBucketValidator().doValidate(config) + ); + } + + @Test + public void testValidBucketWithBatchLoading() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); + when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn("gee_cs"); + + assertEquals( + Optional.empty(), + new GcsBucketValidator().doValidate(config) + ); + } +} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidatorTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidatorTest.java new file mode 100644 index 000000000..205bb56a3 --- /dev/null +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidatorTest.java @@ -0,0 +1,138 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import com.google.common.collect.ImmutableMap; +import org.apache.kafka.common.config.ConfigValue; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.fail; + +public class MultiPropertyValidatorTest { + + private static class TestValidator extends MultiPropertyValidator { + + private final List dependents; + private final Function> validationFunction; + + public TestValidator(String propertyName, List dependents, Function> validationFunction) { + super(propertyName); + this.dependents = dependents; + this.validationFunction = validationFunction; + } + + @Override + protected Collection dependents() { + return dependents; + } + + @Override + protected Optional doValidate(Config config) { + return validationFunction.apply(config); + } + } + + @Test + public void testExistingErrorSkipsValidation() { + MultiPropertyValidator validator = new TestValidator<>( + "p", + Arrays.asList("d1", "d2", "d3"), + o -> { + fail("Validation should have been performed on property that already has an error"); + return null; + } + ); + + ConfigValue configValue = new ConfigValue("p", "v", Collections.emptyList(), Collections.singletonList("an error")); + + assertEquals( + Optional.empty(), + validator.validate(configValue, null, Collections.emptyMap()) + ); + } + + @Test + public void testDependentErrorSkipsValidation() { + MultiPropertyValidator validator = new TestValidator<>( + "p", + Arrays.asList("d1", "d2", "d3"), + o -> { + fail("Validation should have been performed on property whose dependent already has an error"); + return null; + } + ); + + ConfigValue configValue = new ConfigValue("p", "v", Collections.emptyList(), Collections.emptyList()); + Map valuesByName = ImmutableMap.of( + "d1", new ConfigValue("d1", "v1", Collections.emptyList(), Collections.emptyList()), + "d2", new ConfigValue("d2", "v1", Collections.emptyList(), Collections.singletonList("an error")) + ); + + assertEquals( + Optional.empty(), + validator.validate(configValue, null, valuesByName) + ); + } + + @Test + public void testValidationFails() { + Optional expectedError = Optional.of("an error"); + MultiPropertyValidator validator = new TestValidator<>( + "p", + Collections.emptyList(), + o -> expectedError + ); + + ConfigValue configValue = new ConfigValue("p", "v", Collections.emptyList(), Collections.emptyList()); + + assertEquals( + expectedError, + validator.validate(configValue, null, Collections.emptyMap()) + ); + } + + @Test + public void testUnexpectedErrorDuringValidation() { + MultiPropertyValidator validator = new TestValidator<>( + "p", + Collections.emptyList(), + o -> { + throw new RuntimeException("Some unexpected error"); + } + ); + + ConfigValue configValue = new ConfigValue("p", "v", Collections.emptyList(), Collections.emptyList()); + + assertNotEquals( + Optional.empty(), + validator.validate(configValue, null, Collections.emptyMap()) + ); + } +} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidatorTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidatorTest.java new file mode 100644 index 000000000..a4b79a14c --- /dev/null +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidatorTest.java @@ -0,0 +1,80 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import org.junit.Test; + +import java.util.Optional; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class PartitioningModeValidatorTest { + + @Test + public void testDisabledDecoratorSyntaxSkipsValidation() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(false); + + assertEquals( + Optional.empty(), + new PartitioningModeValidator().doValidate(config) + ); + } + + @Test + public void testDecoratorSyntaxWithoutTimestampPartitionFieldName() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); + when(config.getTimestampPartitionFieldName()).thenReturn(Optional.empty()); + + assertEquals( + Optional.empty(), + new PartitioningModeValidator().doValidate(config) + ); + } + + @Test + public void testDecoratorSyntaxWithTimestampPartitionFieldName() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); + when(config.getTimestampPartitionFieldName()).thenReturn(Optional.of("f1")); + + assertNotEquals( + Optional.empty(), + new PartitioningModeValidator().doValidate(config) + ); + } + + @Test + public void testTimestampPartitionFieldNameWithoutDecoratorSyntax() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(false); + when(config.getTimestampPartitionFieldName()).thenReturn(Optional.of("f1")); + + assertEquals( + Optional.empty(), + new PartitioningModeValidator().doValidate(config) + ); + } +} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/SchemaRetrieverValidatorTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/SchemaRetrieverValidatorTest.java new file mode 100644 index 000000000..19a1ae28b --- /dev/null +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/SchemaRetrieverValidatorTest.java @@ -0,0 +1,107 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; +import org.junit.Test; + +import java.util.Optional; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.SCHEMA_UPDATE_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class SchemaRetrieverValidatorTest { + + @Test + public void testDisabledTableCreationSkipsValidation() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(false); + + assertEquals( + Optional.empty(), + new SchemaRetrieverValidator.TableCreationValidator().doValidate(config) + ); + } + + @Test + public void testDisabledSchemaUpdatesSkipsValidation() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); + + assertEquals( + Optional.empty(), + new SchemaRetrieverValidator.SchemaUpdateValidator().doValidate(config) + ); + } + + @Test + public void testTableCreationEnabledWithNoRetriever() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); + when(config.getSchemaRetriever()).thenReturn(null); + + assertNotEquals( + Optional.empty(), + new SchemaRetrieverValidator.TableCreationValidator().doValidate(config) + ); + } + + @Test + public void testSchemaUpdatesEnabledWithNoRetriever() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(SCHEMA_UPDATE_CONFIG)).thenReturn(true); + when(config.getSchemaRetriever()).thenReturn(null); + + assertNotEquals( + Optional.empty(), + new SchemaRetrieverValidator.SchemaUpdateValidator().doValidate(config) + ); + } + + @Test + public void testTableCreationEnabledWithValidRetriever() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); + SchemaRetriever mockRetriever = mock(SchemaRetriever.class); + when(config.getSchemaRetriever()).thenReturn(mockRetriever); + + assertEquals( + Optional.empty(), + new SchemaRetrieverValidator.TableCreationValidator().doValidate(config) + ); + } + + @Test + public void testSchemaUpdatesEnabledWithValidRetriever() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(SCHEMA_UPDATE_CONFIG)).thenReturn(true); + SchemaRetriever mockRetriever = mock(SchemaRetriever.class); + when(config.getSchemaRetriever()).thenReturn(mockRetriever); + + assertEquals( + Optional.empty(), + new SchemaRetrieverValidator.SchemaUpdateValidator().doValidate(config) + ); + } +} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/TableExistenceValidatorTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/TableExistenceValidatorTest.java new file mode 100644 index 000000000..fb9ffc32f --- /dev/null +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/TableExistenceValidatorTest.java @@ -0,0 +1,160 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wepay.kafka.connect.bigquery.config; + +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.Table; +import com.google.cloud.bigquery.TableId; +import com.google.common.collect.ImmutableMap; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Stream; + +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG; +import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TOPICS_CONFIG; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TableExistenceValidatorTest { + + @Test + public void testMissingTableWithAutoCreationDisabled() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getTopicsToDatasets()) + .thenReturn(ImmutableMap.of( + "t1", "d1", + "t2", "d2" + )); + when(config.getBoolean(eq(TABLE_CREATE_CONFIG))).thenReturn(false); + when(config.getList(TOPICS_CONFIG)).thenReturn(Arrays.asList("t1", "t2")); + + BigQuery bigQuery = bigQuery(TableId.of("d1", "t1")); + + assertNotEquals( + Optional.empty(), + new TableExistenceValidator().doValidate(bigQuery, config) + ); + } + + @Test + public void testEmptyTopicsListWithAutoCreationDisabled() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getTopicsToDatasets()) + .thenReturn(ImmutableMap.of( + "t1", "d1", + "t2", "d2" + )); + when(config.getBoolean(eq(TABLE_CREATE_CONFIG))).thenReturn(false); + when(config.getList(TOPICS_CONFIG)).thenReturn(Collections.emptyList()); + + BigQuery bigQuery = bigQuery(); + + assertEquals( + Optional.empty(), + new TableExistenceValidator().doValidate(bigQuery, config) + ); + } + + @Test + public void testMissingTableWithAutoCreationEnabled() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getBoolean(eq(TABLE_CREATE_CONFIG))).thenReturn(true); + + assertEquals( + Optional.empty(), + new TableExistenceValidator().doValidate(null, config) + ); + } + + @Test + public void testExactListOfMissingTables() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getTopicsToDatasets()) + .thenReturn(ImmutableMap.of( + "t1", "d1", + "t2", "d2", + "t3", "d1", + "t4", "d2", + "t5", "d1" + )); + when(config.getList(TOPICS_CONFIG)).thenReturn(Arrays.asList("t1", "t2", "t3", "t4", "t5")); + + BigQuery bigQuery = bigQuery( + TableId.of("d1", "t1"), + TableId.of("d3", "t2"), + TableId.of("d2", "t5") + ); + Set expectedMissingTables = new HashSet<>(Arrays.asList( + TableId.of("d2", "t2"), + TableId.of("d1", "t3"), + TableId.of("d2", "t4"), + TableId.of("d1", "t5") + )); + + assertEquals( + expectedMissingTables, + new HashSet<>(new TableExistenceValidator().missingTables(bigQuery, config)) + ); + } + + @Test + public void testExactEmptyListOfMissingTables() { + BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); + when(config.getTopicsToDatasets()) + .thenReturn(ImmutableMap.of( + "t1", "d1", + "t2", "d2", + "t3", "d1", + "t4", "d2", + "t5", "d1" + )); + when(config.getList(TOPICS_CONFIG)).thenReturn(Arrays.asList("t1", "t2", "t3", "t4", "t5")); + + BigQuery bigQuery = bigQuery( + TableId.of("d1", "t1"), + TableId.of("d2", "t2"), + TableId.of("d1", "t3"), + TableId.of("d2", "t4"), + TableId.of("d1", "t5") + ); + + assertEquals( + Collections.emptyList(), + new TableExistenceValidator().missingTables(bigQuery, config) + ); + } + + private static BigQuery bigQuery(TableId... existingTables) { + BigQuery result = mock(BigQuery.class); + Stream.of(existingTables).forEach(table -> { + Table mockTable = mock(Table.class); + when(result.getTable(eq(table))).thenReturn(mockTable); + }); + return result; + } +} diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/BigQueryRecordConverterTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/BigQueryRecordConverterTest.java index 23bdd6a63..1513b077b 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/BigQueryRecordConverterTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/BigQueryRecordConverterTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert; import static org.junit.Assert.assertEquals; @@ -45,11 +46,12 @@ public class BigQueryRecordConverterTest { private static final Boolean SHOULD_CONVERT_DOUBLE = true; + private static Boolean SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER = false; @Test(expected = ConversionConnectException.class) public void testTopLevelRecord() { SinkRecord kafkaConnectRecord = spoofSinkRecord(Schema.BOOLEAN_SCHEMA, false, false); - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); } @Test @@ -70,7 +72,7 @@ public void testBoolean() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -92,7 +94,7 @@ public void testInteger() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); final Short fieldShortValue = (short) 4242; @@ -109,7 +111,7 @@ public void testInteger() { kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); final Integer fieldIntegerValue = 424242; @@ -126,7 +128,7 @@ public void testInteger() { kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); final Long fieldLongValue = 424242424242L; @@ -143,7 +145,7 @@ public void testInteger() { kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -164,7 +166,7 @@ public void testInteger() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); final Double fieldDoubleValue = 4242424242.4242; @@ -182,7 +184,7 @@ public void testInteger() { kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -209,7 +211,7 @@ public void testInteger() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } } @@ -232,7 +234,7 @@ public void testString() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -264,7 +266,7 @@ public void testStruct() { SinkRecord kafkaConnectInnerSinkRecord = spoofSinkRecord(kafkaConnectInnerSchema, kafkaConnectInnerStruct, false); Map bigQueryTestInnerRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE) + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER) .convertRecord(kafkaConnectInnerSinkRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedInnerRecord, bigQueryTestInnerRecord); @@ -286,7 +288,7 @@ public void testStruct() { SinkRecord kafkaConnectMiddleSinkRecord = spoofSinkRecord(kafkaConnectMiddleSchema, kafkaConnectMiddleStruct, true); Map bigQueryTestMiddleRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE) + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER) .convertRecord(kafkaConnectMiddleSinkRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedMiddleRecord, bigQueryTestMiddleRecord); @@ -308,7 +310,7 @@ public void testStruct() { SinkRecord kafkaConnectOuterSinkRecord = spoofSinkRecord(kafkaConnectOuterSchema, kafkaConnectOuterStruct, false); Map bigQueryTestOuterRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE) + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER) .convertRecord(kafkaConnectOuterSinkRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedOuterRecord, bigQueryTestOuterRecord); } @@ -324,7 +326,7 @@ public void testEmptyStruct() { SinkRecord kafkaConnectSinkRecord = spoofSinkRecord(kafkaConnectInnerSchema, kafkaConnectInnerStruct, false); Map bigQueryTestInnerRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE) + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER) .convertRecord(kafkaConnectSinkRecord, KafkaSchemaRecordType.VALUE); assertEquals(new HashMap(), bigQueryTestInnerRecord); } @@ -357,7 +359,7 @@ public void testEmptyInnerStruct() { SinkRecord kafkaConnectOuterSinkRecord = spoofSinkRecord(kafkaConnectOuterSchema, kafkaConnectOuterStruct, false); Map bigQueryTestOuterRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE) + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER) .convertRecord(kafkaConnectOuterSinkRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedOuterRecord, bigQueryTestOuterRecord); @@ -397,7 +399,7 @@ public void testMap() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -419,7 +421,7 @@ public void testIntegerArray() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -446,7 +448,7 @@ public void testStructArray() { SinkRecord kafkaConnectInnerSinkRecord = spoofSinkRecord(kafkaConnectInnerSchema, kafkaConnectInnerStruct, true); Map bigQueryTestInnerRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE) + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER) .convertRecord(kafkaConnectInnerSinkRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedInnerRecord, bigQueryTestInnerRecord); @@ -467,7 +469,7 @@ public void testStructArray() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -490,7 +492,7 @@ public void testStringArray() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -514,12 +516,13 @@ public void testBytes() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @Test public void testDebeziumLogicalType() { + // Test-1 final String fieldName = "DebeziumDate"; final int fieldDate = 17226; @@ -536,7 +539,50 @@ public void testDebeziumLogicalType() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); + + // Test-2 + String timeStampFieldName = "DebeziumTimestamp"; + long fieldValue = 1611854944000l; + + bigQueryExpectedRecord = new HashMap<>(); + bigQueryExpectedRecord.put(timeStampFieldName, "2021-01-28 17:29:04.000"); + + kafkaConnectSchema = SchemaBuilder + .struct() + .field(timeStampFieldName, io.debezium.time.Timestamp.schema()) + .build(); + + kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(timeStampFieldName, fieldValue); + kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); + + bigQueryTestRecord = + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); + + // Test-3 + timeStampFieldName = "DebeziumTimestamp"; + fieldValue = 1611854944000l; + + // By default, it is set to false + SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER = true; + + bigQueryExpectedRecord = new HashMap<>(); + bigQueryExpectedRecord.put(timeStampFieldName, 1611854944000l); + + kafkaConnectSchema = SchemaBuilder + .struct() + .field(timeStampFieldName, io.debezium.time.Timestamp.schema()) + .build(); + + kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(timeStampFieldName, fieldValue); + kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); + + bigQueryTestRecord = + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -558,7 +604,7 @@ public void testKafkaLogicalType() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -585,7 +631,7 @@ public void testNullable() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, true); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -608,7 +654,7 @@ public void testNullableStruct() { SinkRecord kafkaConnectRecord = spoofSinkRecord(kafkaConnectSchema, kafkaConnectStruct, false); Map bigQueryTestRecord = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(bigQueryExpectedRecord, bigQueryTestRecord); } @@ -630,7 +676,7 @@ public void testValidMapSchemaless() { SinkRecord kafkaConnectRecord = spoofSinkRecord(null, kafkaConnectMap, true); Map convertedMap = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(kafkaConnectMap, convertedMap); } @@ -652,7 +698,7 @@ public void testInvalidMapSchemaless() { SinkRecord kafkaConnectRecord = spoofSinkRecord(null, kafkaConnectMap, false); Map convertedMap = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); } @Test @@ -664,7 +710,7 @@ public void testInvalidMapSchemalessNullValue() { }}; SinkRecord kafkaConnectRecord = spoofSinkRecord(null, kafkaConnectMap, true); - Map stringObjectMap = new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + Map stringObjectMap = new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); Assert.assertEquals(kafkaConnectMap, stringObjectMap ); } @@ -681,7 +727,7 @@ public void testInvalidMapSchemalessNestedMapNullValue() { }}; SinkRecord kafkaConnectRecord = spoofSinkRecord(null, kafkaConnectMap, true); - Map stringObjectMap = new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE) + Map stringObjectMap = new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER) .convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); Assert.assertEquals(kafkaConnectMap, stringObjectMap); } @@ -704,7 +750,7 @@ public void testMapSchemalessConvertDouble() { SinkRecord kafkaConnectRecord = spoofSinkRecord(null, kafkaConnectMap, true); Map convertedMap = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.KEY); assertEquals(convertedMap.get("f1"), Double.MAX_VALUE); assertEquals(((Map)(convertedMap.get("f3"))).get("f4"), Double.MAX_VALUE); assertEquals(((ArrayList)((Map)(convertedMap.get("f3"))).get("f6")).get(1), Double.MAX_VALUE); @@ -730,7 +776,7 @@ public void testMapSchemalessConvertBytes() { SinkRecord kafkaConnectRecord = spoofSinkRecord(null, kafkaConnectMap, false); Map convertedMap = - new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); + new BigQueryRecordConverter(SHOULD_CONVERT_DOUBLE, SHOULD_CONVERT_DEBEZIUM_TIMESTAMP_TO_INTEGER).convertRecord(kafkaConnectRecord, KafkaSchemaRecordType.VALUE); assertEquals(convertedMap.get("f1"), Base64.getEncoder().encodeToString(helloWorld)); assertEquals(((Map)(convertedMap.get("f3"))).get("f4"), Base64.getEncoder().encodeToString(helloWorld)); } diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/BigQuerySchemaConverterTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/BigQuerySchemaConverterTest.java index 70c34e9a5..9185fb724 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/BigQuerySchemaConverterTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/BigQuerySchemaConverterTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,12 +17,15 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; import com.google.cloud.bigquery.Field; import com.google.cloud.bigquery.LegacySQLTypeName; +import com.google.common.collect.ImmutableList; import com.wepay.kafka.connect.bigquery.exception.ConversionConnectException; import org.apache.kafka.connect.data.Date; @@ -33,6 +36,8 @@ import org.junit.Test; +import io.confluent.connect.avro.AvroData; + public class BigQuerySchemaConverterTest { @Test(expected = ConversionConnectException.class) @@ -627,6 +632,46 @@ public void testAllFieldsNullable() { com.google.cloud.bigquery.Schema bigQueryTestSchema = new BigQuerySchemaConverter(true).convertSchema(kafkaConnectTestSchema); assertEquals(bigQueryExpectedSchema, bigQueryTestSchema); + } + @Test + public void testSimpleRecursiveSchemaThrows() { + final String fieldName = "RecursiveField"; + + // Construct Avro schema with recursion since we cannot directly construct Connect schema with cycle + org.apache.avro.Schema recursiveAvroSchema = org.apache.avro.SchemaBuilder + .record("RecursiveItem") + .namespace("com.example") + .fields() + .name(fieldName) + .type().unionOf().nullType().and().type("RecursiveItem").endUnion() + .nullDefault() + .endRecord(); + + Schema connectSchema = new AvroData(100).toConnectSchema(recursiveAvroSchema); + ConversionConnectException e = assertThrows(ConversionConnectException.class, () -> + new BigQuerySchemaConverter(true).convertSchema(connectSchema)); + assertEquals("Kafka Connect schema contains cycle", e.getMessage()); + } + + @Test + public void testComplexRecursiveSchemaThrows() { + final String fieldName = "RecursiveField"; + + // Construct Avro schema with recursion since we cannot directly construct Connect schema with cycle + org.apache.avro.Schema recursiveAvroSchema = org.apache.avro.SchemaBuilder + .record("RecursiveItem") + .namespace("com.example") + .fields() + .name(fieldName) + .type() + .array().items() + .map().values().type("RecursiveItem").noDefault() + .endRecord(); + + Schema connectSchema = new AvroData(100).toConnectSchema(recursiveAvroSchema); + ConversionConnectException e = assertThrows(ConversionConnectException.class, () -> + new BigQuerySchemaConverter(true).convertSchema(connectSchema)); + assertEquals("Kafka Connect schema contains cycle", e.getMessage()); } } diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataConverterTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataConverterTest.java index b2eefb22d..5833c7070 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataConverterTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataConverterTest.java @@ -1,3 +1,22 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.convert; diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/DebeziumLogicalConvertersTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/DebeziumLogicalConvertersTest.java index 91a5c0080..81e27a810 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/DebeziumLogicalConvertersTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/DebeziumLogicalConvertersTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert.logicaltype; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert.logicaltype; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -39,6 +40,7 @@ public class DebeziumLogicalConvertersTest { //corresponds to March 1 2017, 22:20:38.808(123) UTC // (March 1 2017, 14:20:38.808(123)-8:00) private static final Integer DAYS_TIMESTAMP = 17226; + private static final Integer MILLI_TIMESTAMP_INT = 1488406838; private static final Long MILLI_TIMESTAMP = 1488406838808L; private static final Long MICRO_TIMESTAMP = 1488406838808123L; @@ -60,6 +62,13 @@ public void testDateConversion() { @Test public void testMicroTimeConversion() { + testMicroTimeConversionHelper(MICRO_TIMESTAMP, "22:20:38.808123"); + // Test case where microseconds have a leading 0. + long microTimestamp = 1592511382050720L; + testMicroTimeConversionHelper(microTimestamp, "20:16:22.050720"); + } + + private void testMicroTimeConversionHelper(long microTimestamp, String s) { MicroTimeConverter converter = new MicroTimeConverter(); assertEquals(LegacySQLTypeName.TIME, converter.getBQSchemaType()); @@ -70,12 +79,20 @@ public void testMicroTimeConversion() { fail("Expected encoding type check to succeed."); } - String formattedMicroTime = converter.convert(MICRO_TIMESTAMP); - assertEquals("22:20:38.808123", formattedMicroTime); + String formattedMicroTime = converter.convert(microTimestamp); + assertEquals(s, formattedMicroTime); } + @Test public void testMicroTimestampConversion() { + testMicroTimestampConversionHelper(MICRO_TIMESTAMP, "2017-03-01 22:20:38.808123"); + // Test timestamp where microseconds have a leading 0 + Long timestamp = 1592511382050720L; + testMicroTimestampConversionHelper(timestamp, "2020-06-18 20:16:22.050720"); + } + + private void testMicroTimestampConversionHelper(Long timestamp, String s) { MicroTimestampConverter converter = new MicroTimestampConverter(); assertEquals(LegacySQLTypeName.TIMESTAMP, converter.getBQSchemaType()); @@ -86,8 +103,8 @@ public void testMicroTimestampConversion() { fail("Expected encoding type check to succeed."); } - String formattedMicroTimestamp = converter.convert(MICRO_TIMESTAMP); - assertEquals("2017-03-01 22:20:38.808123", formattedMicroTimestamp); + String formattedMicroTimestamp = converter.convert(timestamp); + assertEquals(s, formattedMicroTimestamp); } @Test @@ -102,8 +119,8 @@ public void testTimeConversion() { fail("Expected encoding type check to succeed."); } - String formattedTime = converter.convert(MILLI_TIMESTAMP); - assertEquals("22:20:38.808", formattedTime); + String formattedTime = converter.convert(MILLI_TIMESTAMP_INT); + assertEquals("05:26:46.838", formattedTime); } @Test diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConvertersTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConvertersTest.java index c40a73221..5eb72902f 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConvertersTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConvertersTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.convert.logicaltype; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.convert.logicaltype; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -26,6 +27,7 @@ import com.wepay.kafka.connect.bigquery.convert.logicaltype.KafkaLogicalConverters.DateConverter; import com.wepay.kafka.connect.bigquery.convert.logicaltype.KafkaLogicalConverters.DecimalConverter; import com.wepay.kafka.connect.bigquery.convert.logicaltype.KafkaLogicalConverters.TimestampConverter; +import com.wepay.kafka.connect.bigquery.convert.logicaltype.KafkaLogicalConverters.TimeConverter; import org.apache.kafka.connect.data.Schema; @@ -100,4 +102,32 @@ public void testTimestampConversion() { assertEquals("2017-03-01 22:20:38.808", formattedTimestamp); } + + + @Test + public void testTimeConversion() { + TimeConverter converter = new KafkaLogicalConverters.TimeConverter(); + + assertEquals(LegacySQLTypeName.TIME, converter.getBQSchemaType()); + + try { + converter.checkEncodingType(Schema.Type.INT32); + } catch (Exception ex) { + fail("Expected encoding type check to succeed."); + } + + try { + converter.checkEncodingType(Schema.Type.INT64); + fail("Expected encoding type check to fail"); + } catch (Exception ex) { + // continue + } + + // Can't use the same timestamp here as the one in other tests as the Time type + // should only fall on January 1st, 1970 + Date date = new Date(166838808); + String formattedTimestamp = converter.convert(date); + + assertEquals("22:20:38.808", formattedTimestamp); + } } diff --git a/kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/BigQueryConnectorIntegrationTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/BigQueryConnectorIntegrationTest.java similarity index 94% rename from kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/BigQueryConnectorIntegrationTest.java rename to kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/BigQueryConnectorIntegrationTest.java index 8a6c6505d..a3287d112 100644 --- a/kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/BigQueryConnectorIntegrationTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/BigQueryConnectorIntegrationTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.it; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,16 +17,7 @@ * under the License. */ - -import static com.google.cloud.bigquery.LegacySQLTypeName.BOOLEAN; -import static com.google.cloud.bigquery.LegacySQLTypeName.BYTES; -import static com.google.cloud.bigquery.LegacySQLTypeName.DATE; -import static com.google.cloud.bigquery.LegacySQLTypeName.FLOAT; -import static com.google.cloud.bigquery.LegacySQLTypeName.INTEGER; -import static com.google.cloud.bigquery.LegacySQLTypeName.STRING; -import static com.google.cloud.bigquery.LegacySQLTypeName.TIMESTAMP; - -import static org.junit.Assert.assertEquals; +package com.wepay.kafka.connect.bigquery.it; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.Field; @@ -35,16 +26,13 @@ import com.google.cloud.bigquery.Schema; import com.google.cloud.bigquery.Table; import com.google.cloud.bigquery.TableResult; - -import com.wepay.kafka.connect.bigquery.BigQueryHelper; -import com.wepay.kafka.connect.bigquery.exception.SinkConfigConnectException; - +import com.wepay.kafka.connect.bigquery.GcpClientBuilder; +import org.apache.kafka.common.config.ConfigException; import org.junit.BeforeClass; import org.junit.Test; import java.io.FileNotFoundException; import java.io.InputStream; - import java.time.LocalDate; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; @@ -53,17 +41,24 @@ import java.util.List; import java.util.Properties; +import static com.google.cloud.bigquery.LegacySQLTypeName.BOOLEAN; +import static com.google.cloud.bigquery.LegacySQLTypeName.BYTES; +import static com.google.cloud.bigquery.LegacySQLTypeName.DATE; +import static com.google.cloud.bigquery.LegacySQLTypeName.FLOAT; +import static com.google.cloud.bigquery.LegacySQLTypeName.INTEGER; +import static com.google.cloud.bigquery.LegacySQLTypeName.STRING; +import static com.google.cloud.bigquery.LegacySQLTypeName.TIMESTAMP; +import static org.junit.Assert.assertEquals; + public class BigQueryConnectorIntegrationTest { public static final String TEST_PROPERTIES_FILENAME = "/test.properties"; public static final String KEYFILE_PROPERTY = "keyfile"; public static final String PROJECT_PROPERTY = "project"; public static final String DATASET_PROPERTY = "dataset"; - public static final String KEY_SOURCE_PROPERTY = "keySource"; - private static String keyfile; + private static String key; private static String project; private static String dataset; - private static String keySource; private static BigQuery bigQuery; @@ -86,9 +81,9 @@ private static void initializeTestProperties() throws Exception { Properties properties = new Properties(); properties.load(propertiesFile); - keyfile = properties.getProperty(KEYFILE_PROPERTY); - if (keyfile == null) { - throw new SinkConfigConnectException( + key = properties.getProperty(KEYFILE_PROPERTY); + if (key == null) { + throw new ConfigException( "'" + KEYFILE_PROPERTY + "' property must be specified in test properties file" ); @@ -96,7 +91,7 @@ private static void initializeTestProperties() throws Exception { project = properties.getProperty(PROJECT_PROPERTY); if (project == null) { - throw new SinkConfigConnectException( + throw new ConfigException( "'" + PROJECT_PROPERTY + "' property must be specified in test properties file" ); @@ -104,18 +99,20 @@ private static void initializeTestProperties() throws Exception { dataset = properties.getProperty(DATASET_PROPERTY); if (dataset == null) { - throw new SinkConfigConnectException( + throw new ConfigException( "'" + DATASET_PROPERTY + "' property must be specified in test properties file" ); } - - keySource = properties.getProperty(KEY_SOURCE_PROPERTY); } } - private static void initializeBigQuery() throws Exception { - bigQuery = new BigQueryHelper().setKeySource(keySource).connect(project, keyfile); + private static void initializeBigQuery() { + bigQuery = new GcpClientBuilder.BigQueryBuilder() + .withKeySource(GcpClientBuilder.KeySource.FILE) + .withKey(key) + .withProject(project) + .build(); } private static List boxByteArray(byte[] bytes) { diff --git a/kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/utils/BucketClearer.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/utils/BucketClearer.java similarity index 73% rename from kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/utils/BucketClearer.java rename to kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/utils/BucketClearer.java index b5bcb4317..845be68cc 100644 --- a/kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/utils/BucketClearer.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/utils/BucketClearer.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.it.utils; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,30 +17,29 @@ * under the License. */ - -import com.google.cloud.storage.Bucket; +package com.wepay.kafka.connect.bigquery.it.utils; import com.google.cloud.storage.Storage; -import com.wepay.kafka.connect.bigquery.GCSBuilder; +import com.wepay.kafka.connect.bigquery.GcpClientBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class BucketClearer { private static final Logger logger = LoggerFactory.getLogger(BucketClearer.class); - private static String keySource; /** * Clears tables in the given project and dataset, using a provided JSON service account key. */ public static void main(String[] args) { - if (args.length < 4) { + if (args.length != 3) { usage(); } - if (args.length == 4) { - keySource = args[3]; - } - Storage gcs = new GCSBuilder(args[1]).setKey(args[0]).setKeySource(keySource).build(); + Storage gcs = new GcpClientBuilder.GcsBuilder() + .withKeySource(GcpClientBuilder.KeySource.FILE) + .withKey(args[0]) + .withProject(args[1]) + .build(); // if bucket exists, delete it. String bucketName = args[2]; @@ -53,7 +52,7 @@ public static void main(String[] args) { private static void usage() { System.err.println( - "usage: BucketClearer " + "usage: BucketClearer " ); System.exit(1); } diff --git a/kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/utils/TableClearer.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/utils/TableClearer.java similarity index 78% rename from kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/utils/TableClearer.java rename to kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/utils/TableClearer.java index 32ad48371..c66606eae 100644 --- a/kcbq-connector/src/integration-test/java/com/wepay/kafka/connect/bigquery/it/utils/TableClearer.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/it/utils/TableClearer.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.it.utils; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,34 +17,35 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.it.utils; import com.google.cloud.bigquery.BigQuery; -import com.wepay.kafka.connect.bigquery.BigQueryHelper; +import com.wepay.kafka.connect.bigquery.GcpClientBuilder; import com.wepay.kafka.connect.bigquery.utils.FieldNameSanitizer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class TableClearer { - private static final Logger logger = LoggerFactory.getLogger(TableClearer.class); - private static String keySource; + private static final Logger logger = LoggerFactory.getLogger(TableClearer.class); /** * Clears tables in the given project and dataset, using a provided JSON service account key. */ public static void main(String[] args) { - if (args.length < 5) { + if (args.length < 4) { usage(); } - int tablesStart = 3; - if (args.length == 5) { - keySource = args[3]; - tablesStart = 4; - } - BigQuery bigQuery = new BigQueryHelper().setKeySource(keySource).connect(args[1], args[0]); - for (int i = tablesStart; i < args.length; i++) { + + BigQuery bigQuery = new GcpClientBuilder.BigQueryBuilder() + .withKeySource(GcpClientBuilder.KeySource.FILE) + .withKey(args[0]) + .withProject(args[1]) + .build(); + + for (int i = 3; i < args.length; i++) { // May be consider using sanitizeTopics property value in future to decide table name // sanitization but as currently we always run test cases with sanitizeTopics value as true // hence sanitize table name prior delete. This is required else it makes test cases flaky. @@ -59,7 +60,7 @@ public static void main(String[] args) { private static void usage() { System.err.println( - "usage: TableClearer [
...]" + "usage: TableClearer
[
...]" ); System.exit(1); } diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/retrieve/MemorySchemaRetrieverTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/retrieve/MemorySchemaRetrieverTest.java index 6148ada45..e564dea35 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/retrieve/MemorySchemaRetrieverTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/retrieve/MemorySchemaRetrieverTest.java @@ -1,5 +1,23 @@ -package com.wepay.kafka.connect.bigquery.retrieve; +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.wepay.kafka.connect.bigquery.retrieve; import com.google.cloud.bigquery.TableId; @@ -14,7 +32,6 @@ import java.util.HashMap; - public class MemorySchemaRetrieverTest { public TableId getTableId(String datasetName, String tableName) { return TableId.of(datasetName, tableName); diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizerTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizerTest.java index 3358c1386..70d0d508b 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizerTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizerTest.java @@ -1,5 +1,25 @@ +/* + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package com.wepay.kafka.connect.bigquery.utils; +import java.util.Collections; import java.util.Map; import org.junit.Before; import org.junit.Test; @@ -75,4 +95,26 @@ public void testInvalidSymbol() { // Validate map size. assertEquals(5, sanitizedMap.size()); } + + /** + * Verifies that null values are acceptable while sanitizing keys. + */ + @Test + public void testNullValue() { + assertEquals( + Collections.singletonMap("abc", null), + FieldNameSanitizer.replaceInvalidKeys(Collections.singletonMap("abc", null))); + } + + @Test + public void testDeeplyNestedNullValues() { + testMap = new HashMap<>(); + testMap.put("top", null); + testMap.put("middle", Collections.singletonMap("key", null)); + testMap.put("bottom", Collections.singletonMap("key", Collections.singletonMap("key", null))); + assertEquals( + testMap, + FieldNameSanitizer.replaceInvalidKeys(testMap) + ); + } } diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableIdTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableIdTest.java index 1737bfbdf..d24072714 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableIdTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableIdTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.utils; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.utils; import com.google.cloud.bigquery.TableId; diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/TopicToTableResolverTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/TopicToTableResolverTest.java index 3e41cc66f..53348a7ba 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/TopicToTableResolverTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/TopicToTableResolverTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.utils; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.utils; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/row/BigQueryWriterTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/row/BigQueryWriterTest.java index 7c0e871e5..d55f377cb 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/row/BigQueryWriterTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/row/BigQueryWriterTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.row; - /* - * Copyright 2016 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,10 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.row; import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Matchers.anyObject; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -30,16 +32,17 @@ import com.google.cloud.bigquery.BigQueryException; import com.google.cloud.bigquery.InsertAllRequest; import com.google.cloud.bigquery.InsertAllResponse; +import com.google.cloud.bigquery.Table; import com.google.cloud.storage.Storage; import com.wepay.kafka.connect.bigquery.BigQuerySinkTask; import com.wepay.kafka.connect.bigquery.SchemaManager; -import com.wepay.kafka.connect.bigquery.SinkTaskPropertiesFactory; +import com.wepay.kafka.connect.bigquery.SinkPropertiesFactory; import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; -import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; +import com.wepay.kafka.connect.bigquery.retrieve.MemorySchemaRetriever; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; @@ -60,11 +63,11 @@ @SuppressWarnings("unchecked") public class BigQueryWriterTest { - private static SinkTaskPropertiesFactory propertiesFactory; + private static SinkPropertiesFactory propertiesFactory; @BeforeClass public static void initializePropertiesFactory() { - propertiesFactory = new SinkTaskPropertiesFactory(); + propertiesFactory = new SinkPropertiesFactory(); } @Test @@ -106,7 +109,8 @@ public void testAutoCreateTables() { final String topic = "test_topic"; final String dataset = "scratch"; final Map properties = makeProperties("3", "2000", topic, dataset); - properties.put(BigQuerySinkTaskConfig.TABLE_CREATE_CONFIG, "true"); + properties.put(BigQuerySinkConfig.TABLE_CREATE_CONFIG, "true"); + properties.put(BigQuerySinkConfig.SCHEMA_RETRIEVER_CONFIG, MemorySchemaRetriever.class.getName()); BigQuery bigQuery = mock(BigQuery.class); Map> emptyMap = mock(Map.class); @@ -116,8 +120,7 @@ public void testAutoCreateTables() { when(insertAllResponse.hasErrors()).thenReturn(false); when(insertAllResponse.getInsertErrors()).thenReturn(emptyMap); - BigQueryException missTableException = mock(BigQueryException.class); - when(missTableException.getCode()).thenReturn(404); + BigQueryException missTableException = new BigQueryException(404, "Table is missing"); when(bigQuery.insertAll(anyObject())).thenThrow(missTableException).thenReturn(insertAllResponse); @@ -137,13 +140,15 @@ public void testAutoCreateTables() { verify(bigQuery, times(2)).insertAll(anyObject()); } - @Test + @Test(expected = BigQueryConnectException.class) public void testNonAutoCreateTables() { final String topic = "test_topic"; final String dataset = "scratch"; final Map properties = makeProperties("3", "2000", topic, dataset); BigQuery bigQuery = mock(BigQuery.class); + Table mockTable = mock(Table.class); + when(bigQuery.getTable(any())).thenReturn(mockTable); Map> emptyMap = mock(Map.class); when(emptyMap.isEmpty()).thenReturn(true); @@ -151,8 +156,7 @@ public void testNonAutoCreateTables() { when(insertAllResponse.hasErrors()).thenReturn(false); when(insertAllResponse.getInsertErrors()).thenReturn(emptyMap); - BigQueryException missTableException = mock(BigQueryException.class); - when(missTableException.getCode()).thenReturn(404); + BigQueryException missTableException = new BigQueryException(404, "Table is missing"); when(bigQuery.insertAll(anyObject())).thenThrow(missTableException).thenReturn(insertAllResponse); @@ -167,9 +171,6 @@ public void testNonAutoCreateTables() { testTask.put( Collections.singletonList(spoofSinkRecord(topic, 0, 0, "some_field", "some_value"))); testTask.flush(Collections.emptyMap()); - - verify(schemaManager, times(0)).createTable(anyObject(), anyObject()); - verify(bigQuery, times(2)).insertAll(anyObject()); } @Test @@ -285,8 +286,8 @@ private Map makeProperties(String bigqueryRetry, String topic, String dataset) { Map properties = propertiesFactory.getProperties(); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_CONFIG, bigqueryRetry); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_WAIT_CONFIG, bigqueryRetryWait); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_CONFIG, bigqueryRetry); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_WAIT_CONFIG, bigqueryRetryWait); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, String.format(".*=%s", dataset)); return properties; diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/row/GCSToBQWriterTest.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/row/GCSToBQWriterTest.java index 34bff0f8a..ca1b4d55b 100644 --- a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/row/GCSToBQWriterTest.java +++ b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/row/GCSToBQWriterTest.java @@ -1,7 +1,7 @@ -package com.wepay.kafka.connect.bigquery.write.row; - /* - * Copyright 2019 WePay, Inc. + * Copyright 2020 Confluent, Inc. + * + * This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,19 +17,18 @@ * under the License. */ +package com.wepay.kafka.connect.bigquery.write.row; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.Table; -import com.google.cloud.storage.Blob; import com.google.cloud.storage.BlobInfo; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageException; import com.wepay.kafka.connect.bigquery.BigQuerySinkTask; import com.wepay.kafka.connect.bigquery.SchemaManager; -import com.wepay.kafka.connect.bigquery.SinkTaskPropertiesFactory; +import com.wepay.kafka.connect.bigquery.SinkPropertiesFactory; import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; -import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; @@ -51,11 +50,11 @@ public class GCSToBQWriterTest { - private static SinkTaskPropertiesFactory propertiesFactory; + private static SinkPropertiesFactory propertiesFactory; @BeforeClass public static void initializePropertiesFactory() { - propertiesFactory = new SinkTaskPropertiesFactory(); + propertiesFactory = new SinkPropertiesFactory(); } @Test @@ -161,8 +160,8 @@ private Map makeProperties(String bigqueryRetry, String topic, String dataset) { Map properties = propertiesFactory.getProperties(); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_CONFIG, bigqueryRetry); - properties.put(BigQuerySinkTaskConfig.BIGQUERY_RETRY_WAIT_CONFIG, bigqueryRetryWait); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_CONFIG, bigqueryRetry); + properties.put(BigQuerySinkConfig.BIGQUERY_RETRY_WAIT_CONFIG, bigqueryRetryWait); properties.put(BigQuerySinkConfig.TOPICS_CONFIG, topic); properties.put(BigQuerySinkConfig.DATASETS_CONFIG, String.format(".*=%s", dataset)); // gcs config diff --git a/kcbq-connector/src/test/resources/log4j.properties b/kcbq-connector/src/test/resources/log4j.properties new file mode 100644 index 000000000..94fb72b55 --- /dev/null +++ b/kcbq-connector/src/test/resources/log4j.properties @@ -0,0 +1,33 @@ +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +log4j.rootLogger=INFO, stdout + +# Send the logs to the console. +# +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + +connect.log.pattern=[%d] %p %X{connector.context}%m (%c:%L)%n +log4j.appender.stdout.layout.ConversionPattern=${connect.log.pattern} +log4j.appender.connectAppender.layout.ConversionPattern=${connect.log.pattern} + +# These are used in the log4j properties file that ships by default with Connect +log4j.logger.org.apache.zookeeper=ERROR +log4j.logger.org.reflections=ERROR diff --git a/kcbq-connector/test/docker/connect/Dockerfile b/kcbq-connector/test/docker/connect/Dockerfile index 447e6e9a9..a198b4d16 100644 --- a/kcbq-connector/test/docker/connect/Dockerfile +++ b/kcbq-connector/test/docker/connect/Dockerfile @@ -1,4 +1,7 @@ -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,17 +16,12 @@ # specific language governing permissions and limitations # under the License. # -# Builds a docker image for the Kafka-BigQuery Connector. -# Expects links to "kafka" and "schema-registry" containers. -# -# Usage: -# docker build -t kcbq/connect connect -# docker run --name kcbq_test_connect \ -# --link kcbq_test_kafka:kafka --link kcbq_test_schema-registry:schema-registry \ -# kcbq/connect FROM confluentinc/cp-kafka-connect-base:4.1.2 +RUN ["apt", "update"] +RUN ["apt", "install", "unzip"] + COPY connect-docker.sh /usr/local/bin/ RUN ["chmod", "+x", "/usr/local/bin/connect-docker.sh"] diff --git a/kcbq-connector/test/docker/connect/connect-docker.sh b/kcbq-connector/test/docker/connect/connect-docker.sh index 65c2bd606..06ef2616b 100755 --- a/kcbq-connector/test/docker/connect/connect-docker.sh +++ b/kcbq-connector/test/docker/connect/connect-docker.sh @@ -1,5 +1,8 @@ #! /usr/bin/env bash -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +16,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# -tar -C /usr/local/share/kafka/plugins/kafka-connect-bigquery/ -xf /usr/local/share/kafka/plugins/kafka-connect-bigquery/kcbq.tar +unzip -j -d /usr/local/share/kafka/plugins/kafka-connect-bigquery/ /usr/local/share/kafka/plugins/kafka-connect-bigquery/kcbq.zip 'wepay-kafka-connect-bigquery-*/lib/*.jar' connect-standalone \ /etc/kafka-connect-bigquery/standalone.properties \ diff --git a/kcbq-connector/test/docker/populate/Dockerfile b/kcbq-connector/test/docker/populate/Dockerfile index c91dcecf6..0fe1d5f15 100644 --- a/kcbq-connector/test/docker/populate/Dockerfile +++ b/kcbq-connector/test/docker/populate/Dockerfile @@ -1,4 +1,7 @@ -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,14 +16,6 @@ # specific language governing permissions and limitations # under the License. # -# Populates Kafka and Schema Registry with test data -# Expects links to "kafka" and "schema-registry" containers. -# -# Usage: -# docker build -t kcbq/populate populate -# docker run --name kcbq_test_populate \ -# --link kcbq_test_kafka:kafka --link kcbq_test_schema-registry:schema-registry \ -# kcbq/populate FROM confluentinc/cp-schema-registry:4.1.2 diff --git a/kcbq-connector/test/docker/populate/populate-docker.sh b/kcbq-connector/test/docker/populate/populate-docker.sh index aed681140..00795acd6 100755 --- a/kcbq-connector/test/docker/populate/populate-docker.sh +++ b/kcbq-connector/test/docker/populate/populate-docker.sh @@ -1,5 +1,8 @@ #! /usr/bin/env bash -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +16,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# for schema_dir in /tmp/schemas/*; do kafka-avro-console-producer \ diff --git a/kcbq-connector/test/integrationtest.sh b/kcbq-connector/test/integrationtest.sh index a88c5518b..53bef22a1 100755 --- a/kcbq-connector/test/integrationtest.sh +++ b/kcbq-connector/test/integrationtest.sh @@ -1,5 +1,8 @@ #! /usr/bin/env bash -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,11 +16,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# #################################################################################################### # Basic script setup -set -e +set -ex if [[ -t 1 ]]; then NORMAL="$(tput sgr0)" @@ -64,7 +68,6 @@ log() { msg "$BOLD" "$*"; } BASE_DIR=$(dirname "$0") -GRADLEW="$BASE_DIR/../../gradlew" #################################################################################################### # Configuration processing @@ -211,29 +214,26 @@ docker start -a "$POPULATE_DOCKER_NAME" # Deleting existing BigQuery tables/bucket warn 'Deleting existing BigQuery test tables and existing GCS bucket' - -test_tables= -test_topics= +unset TEST_TABLES +unset TEST_TOPICS for file in "$BASE_DIR"/resources/test_schemas/*; do - test_tables+="${test_tables:+ }kcbq_test_$(basename "${file/-/_}")" - test_topics+="${test_topics:+,}kcbq_test_$(basename "$file")" + TEST_TABLES+="${TEST_TABLES:+ }kcbq_test_$(basename "${file/-/_}")" + TEST_TOPICS+="${TEST_TOPICS:+,}kcbq_test_$(basename "$file")" done -"$GRADLEW" -p "$BASE_DIR/.." \ - -Pkcbq_test_keyfile="$KCBQ_TEST_KEYFILE" \ - -Pkcbq_test_project="$KCBQ_TEST_PROJECT" \ - -Pkcbq_test_dataset="$KCBQ_TEST_DATASET" \ - -Pkcbq_test_tables="$test_tables" \ - -Pkcbq_test_bucket="$KCBQ_TEST_BUCKET" \ - -Pkcbq_test_keysource="$KCBQ_TEST_KEYSOURCE" \ - integrationTestPrep +mvn -f "$BASE_DIR/.." clean test-compile +mvn -f "$BASE_DIR/.." exec:java -Dexec.mainClass=com.wepay.kafka.connect.bigquery.it.utils.TableClearer \ + -Dexec.classpathScope=test \ + -Dexec.args="${KCBQ_TEST_KEYFILE} ${KCBQ_TEST_PROJECT} ${KCBQ_TEST_DATASET} ${TEST_TABLES}" +mvn -f "$BASE_DIR/.." exec:java -Dexec.mainClass=com.wepay.kafka.connect.bigquery.it.utils.BucketClearer \ + -Dexec.classpathScope=test \ + -Dexec.args="${KCBQ_TEST_KEYFILE} ${KCBQ_TEST_PROJECT} ${KCBQ_TEST_BUCKET}" #################################################################################################### # Executing connector in standalone mode (this is the execution portion of the actual test) statusupdate 'Executing Kafka Connect in Docker' -# Run clean task to ensure there's only one connector tarball in the build/dist directory -"$GRADLEW" -q -p "$BASE_DIR/../.." clean distTar +mvn -f "$BASE_DIR/.." install -Dskip.unit.tests=true [[ ! -e "$DOCKER_DIR/connect/properties" ]] && mkdir "$DOCKER_DIR/connect/properties" RESOURCES_DIR="$BASE_DIR/resources" @@ -248,14 +248,14 @@ project=$KCBQ_TEST_PROJECT datasets=.*=$KCBQ_TEST_DATASET gcsBucketName=$KCBQ_TEST_BUCKET gcsFolderName=$KCBQ_TEST_FOLDER -topics=$test_topics +topics=$TEST_TOPICS EOF CONNECT_DOCKER_IMAGE='kcbq/connect' CONNECT_DOCKER_NAME='kcbq_test_connect' -cp "$BASE_DIR"/../../kcbq-confluent/build/distributions/kcbq-confluent-*.tar "$DOCKER_DIR/connect/kcbq.tar" +cp "$BASE_DIR"/../target/components/packages/wepay-kafka-connect-bigquery-*.zip "$DOCKER_DIR/connect/kcbq.zip" if [[ "$KCBQ_TEST_KEYSOURCE" == "JSON" ]]; then echo "$KCBQ_TEST_KEYFILE" > "$DOCKER_DIR/connect/key.json" else @@ -268,7 +268,7 @@ fi docker create --name "$CONNECT_DOCKER_NAME" \ --link "$KAFKA_DOCKER_NAME:kafka" --link "$SCHEMA_REGISTRY_DOCKER_NAME:schema-registry" \ -t "$CONNECT_DOCKER_IMAGE" /bin/bash -docker cp "$DOCKER_DIR/connect/kcbq.tar" "$CONNECT_DOCKER_NAME:/usr/local/share/kafka/plugins/kafka-connect-bigquery/kcbq.tar" +docker cp "$DOCKER_DIR/connect/kcbq.zip" "$CONNECT_DOCKER_NAME:/usr/local/share/kafka/plugins/kafka-connect-bigquery/kcbq.zip" docker cp "$DOCKER_DIR/connect/properties/" "$CONNECT_DOCKER_NAME:/etc/kafka-connect-bigquery/" docker cp "$DOCKER_DIR/connect/key.json" "$CONNECT_DOCKER_NAME:/tmp/key.json" docker start -a "$CONNECT_DOCKER_NAME" @@ -277,10 +277,10 @@ docker start -a "$CONNECT_DOCKER_NAME" # Checking on BigQuery data via Java test (this is the verification portion of the actual test) statusupdate 'Verifying that test data made it successfully to BigQuery' -INTEGRATION_TEST_RESOURCE_DIR="$BASE_DIR/../src/integration-test/resources" -[[ ! -d "$INTEGRATION_TEST_RESOURCE_DIR" ]] && mkdir -p "$INTEGRATION_TEST_RESOURCE_DIR" +TEST_RESOURCE_DIR="$BASE_DIR/../src/test/resources" +[[ ! -d "$TEST_RESOURCE_DIR" ]] && mkdir -p "$TEST_RESOURCE_DIR" -cat << EOF > "$INTEGRATION_TEST_RESOURCE_DIR/test.properties" +cat << EOF > "$TEST_RESOURCE_DIR/test.properties" keyfile=$KCBQ_TEST_KEYFILE project=$KCBQ_TEST_PROJECT dataset=$KCBQ_TEST_DATASET @@ -289,5 +289,4 @@ folder=$KCBQ_TEST_FOLDER keysource=$KCBQ_TEST_KEYSOURCE EOF - -"$GRADLEW" -p "$BASE_DIR/.." cleanIntegrationTest integrationTest +mvn -f "$BASE_DIR/.." clean test-compile -Dskip.unit.tests=true failsafe:integration-test@verify-docker-test \ No newline at end of file diff --git a/kcbq-connector/test/resources/connector-template.properties b/kcbq-connector/test/resources/connector-template.properties index 20897982c..f40565045 100644 --- a/kcbq-connector/test/resources/connector-template.properties +++ b/kcbq-connector/test/resources/connector-template.properties @@ -1,4 +1,7 @@ -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +15,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# name=bigquery-connector connector.class=com.wepay.kafka.connect.bigquery.BigQuerySinkConnector diff --git a/kcbq-connector/test/resources/standalone-template.properties b/kcbq-connector/test/resources/standalone-template.properties index cb9998503..bd019636f 100644 --- a/kcbq-connector/test/resources/standalone-template.properties +++ b/kcbq-connector/test/resources/standalone-template.properties @@ -1,4 +1,7 @@ -# Copyright 2016 WePay, Inc. +# +# Copyright 2020 Confluent, Inc. +# +# This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +15,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# bootstrap.servers=kafka:29092 key.converter=io.confluent.connect.avro.AvroConverter diff --git a/pom.xml b/pom.xml new file mode 100644 index 000000000..d49d417ed --- /dev/null +++ b/pom.xml @@ -0,0 +1,415 @@ + + + + 4.0.0 + + com.wepay.kcbq + kcbq-parent + 1.6.11-SNAPSHOT + pom + + + kcbq-api + kcbq-confluent + kcbq-connector + + + + 8 + + 1.9.2 + 5.5.0 + 0.6.2 + 2.3.2 + 0.16.1 + 1.79.0 + 3.19.6 + 2.8.5 + 2.10.2 + 2.5.0 + 1.7.26 + + 4.13 + 3.2.4 + + 2.15 + 6.18 + 3.8.1 + 0.8.5 + 0.11.1 + 2.5.3 + 3.7.1 + 3.0.0-M4 + + ${project.basedir} + ${maven.test.skip} + + + kafka-connect-bigquery-parent + + https://github.com/confluentinc/kafka-connect-bigquery + + 2016 + + + + Apache License 2.0 + https://www.apache.org/licenses/LICENSE-2.0 + repo + + + + + scm:git:git://github.com/confluentinc/kafka-connect-bigquery.git + scm:git:git@github.com:confluentinc/kafka-connect-bigquery.git + https://github.com/confluentinc/kafka-connect-bigquery + HEAD + + + + + C0urante + Chris Egerton + fearthecellos@gmail.com + America/New_York + + + moirat + Moira Tagle + moirat@wepay.com + America/Los_Angeles + + + + + + confluent + https://packages.confluent.io/maven/ + + + jcenter + https://jcenter.bintray.com + + + + + + confluent + https://packages.confluent.io/maven/ + + + jcenter + https://jcenter.bintray.com + + + + + + + + com.wepay.kcbq + kcbq-api + ${project.version} + + + com.wepay.kcbq + kcbq-confluent + ${project.version} + + + + + org.apache.kafka + connect-api + ${kafka.version} + provided + + + org.apache.kafka + kafka-clients + ${kafka.version} + provided + + + + com.google.cloud + google-cloud-bigquery + ${google.cloud.version} + + + com.google.cloud + google-cloud-storage + ${google.cloud.version} + + + com.google.auth + google-auth-library-oauth2-http + ${google.auth.version} + + + org.slf4j + slf4j-api + ${slf4j.version} + + + io.debezium + debezium-core + ${debezium.version} + + + org.apache.avro + avro + ${avro.version} + + + io.confluent + kafka-connect-avro-converter + ${confluent.version} + + + io.confluent + kafka-schema-registry-client + ${confluent.version} + + + + + com.google.errorprone + error_prone_annotations + ${errorprone.version} + + + com.google.code.gson + gson + ${gson.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + com.google.protobuf + protobuf-java + ${google.protobuf.version} + + + + junit + junit + ${junit.version} + test + + + org.mockito + mockito-core + ${mockito.version} + test + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + test + + + + + + + + org.apache.maven.plugins + maven-release-plugin + ${release.plugin.version} + + true + false + v@{project.version} + + + + com.mycila + license-maven-plugin + 3.0 + + +Copyright 2020 Confluent, Inc. + +This software contains code derived from the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. + + + ${main.dir}/config/copyright/custom-header-styles.xml + + + CUSTOM_JAVA_STYLE + JENKINSFILE_STYLE + + + LICENSE.md + *.log + config/checkstyle/google_checks.xml + + + .ci/* + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${compiler.plugin.version} + + ${java.version} + ${java.version} + + + + org.apache.maven.plugins + maven-surefire-plugin + ${surefire.plugin.version} + + + **/*IntegrationTest.java + + ${skip.unit.tests} + + + + org.apache.maven.plugins + maven-failsafe-plugin + ${surefire.plugin.version} + + + verify-docker-test + + integration-test + + + + **/*IntegrationTest.java + + + + + + + org.jacoco + jacoco-maven-plugin + ${jacoco.plugin.version} + + + pre-unit-test + + prepare-agent + + + + report + verify + + report + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + ${checkstyle.plugin.version} + + + validate + validate + + ${project.parent.basedir}/config/checkstyle/google_checks.xml + ${project.parent.basedir}/config/checkstyle/suppressions.xml + + + check + + + + + + com.puppycrawl.tools + checkstyle + ${checkstyle.version} + + + + + + org.apache.maven.plugins + maven-site-plugin + ${site.plugin.version} + + + io.confluent + kafka-connect-maven-plugin + ${kafka.connect.plugin.version} + + + + + + + jenkins + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + + true + + + + + + + + diff --git a/settings.gradle b/settings.gradle deleted file mode 100644 index a24368b32..000000000 --- a/settings.gradle +++ /dev/null @@ -1 +0,0 @@ -include 'kcbq-connector', 'kcbq-api', 'kcbq-confluent'