Skip to content

Commit 0fc91d8

Browse files
committed
Fork DSBulk Text Codec
1 parent 9392aae commit 0fc91d8

File tree

134 files changed

+11566
-1
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+11566
-1
lines changed

common/pom.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@
3232
<artifactId>dsbulk-codecs-api</artifactId>
3333
</dependency>
3434
<dependency>
35+
<!-- forked in this repository temporary -->
3536
<groupId>com.datastax.oss</groupId>
3637
<artifactId>dsbulk-codecs-text</artifactId>
38+
<version>1.11.0-vectors-preview</version>
3739
</dependency>
3840
<dependency>
3941
<groupId>com.datastax.oss</groupId>

pom.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
<url>http://www.datastax.com</url>
3232
</organization>
3333
<modules>
34+
<module>text</module>
3435
<module>common</module>
3536
</modules>
3637
<properties>
@@ -41,7 +42,7 @@
4142
<kafka.connect.version>2.4.0</kafka.connect.version>
4243
<caffeine.version>2.6.2</caffeine.version>
4344
<oss.driver.version>4.16.0</oss.driver.version>
44-
<dsbulk.version>1.11.0-SNAPSHOT</dsbulk.version>
45+
<dsbulk.version>1.10.0</dsbulk.version>
4546
<reactive-streams.version>1.0.3</reactive-streams.version>
4647
<guava.version>25.1-jre</guava.version>
4748
<slf4j.version>1.7.25</slf4j.version>

text/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# DataStax Bulk Loader Codecs - Text
2+
3+
This module contains implementations of the ConvertingCodec API for Strings and Json.
4+
Json conversion is done using FasterXML Jackson library.

text/pom.xml

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
4+
Copyright DataStax, Inc.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
18+
-->
19+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
20+
<modelVersion>4.0.0</modelVersion>
21+
<parent>
22+
<artifactId>dsbulk-codecs</artifactId>
23+
<groupId>com.datastax.oss</groupId>
24+
<version>1.10.0</version>
25+
<relativePath/>
26+
</parent>
27+
<version>1.11.0-vectors-preview</version>
28+
<artifactId>dsbulk-codecs-text</artifactId>
29+
<name>DataStax Bulk Loader - Codecs - Text - Vectors preview</name>
30+
<description>Text codecs for the DataStax Bulk Loader (String and JSON).</description>
31+
<dependencyManagement>
32+
<dependencies>
33+
<dependency>
34+
<groupId>com.datastax.oss</groupId>
35+
<artifactId>dsbulk-bom</artifactId>
36+
<version>1.10.0</version>
37+
<type>pom</type>
38+
<scope>import</scope>
39+
</dependency>
40+
</dependencies>
41+
</dependencyManagement>
42+
<dependencies>
43+
<dependency>
44+
<groupId>com.datastax.oss</groupId>
45+
<artifactId>java-driver-core</artifactId>
46+
<version>4.16.0</version>
47+
</dependency>
48+
<dependency>
49+
<groupId>com.datastax.oss</groupId>
50+
<artifactId>dsbulk-codecs-api</artifactId>
51+
</dependency>
52+
<dependency>
53+
<groupId>com.datastax.oss</groupId>
54+
<artifactId>java-driver-shaded-guava</artifactId>
55+
</dependency>
56+
<dependency>
57+
<groupId>org.slf4j</groupId>
58+
<artifactId>slf4j-api</artifactId>
59+
</dependency>
60+
<dependency>
61+
<groupId>io.netty</groupId>
62+
<artifactId>netty-common</artifactId>
63+
</dependency>
64+
<dependency>
65+
<groupId>com.fasterxml.jackson.core</groupId>
66+
<artifactId>jackson-core</artifactId>
67+
</dependency>
68+
<dependency>
69+
<groupId>com.fasterxml.jackson.core</groupId>
70+
<artifactId>jackson-databind</artifactId>
71+
</dependency>
72+
<dependency>
73+
<groupId>com.datastax.oss</groupId>
74+
<artifactId>dsbulk-tests</artifactId>
75+
<scope>test</scope>
76+
</dependency>
77+
<dependency>
78+
<groupId>org.junit.jupiter</groupId>
79+
<artifactId>junit-jupiter-engine</artifactId>
80+
<scope>test</scope>
81+
</dependency>
82+
<dependency>
83+
<groupId>org.junit.jupiter</groupId>
84+
<artifactId>junit-jupiter-params</artifactId>
85+
<scope>test</scope>
86+
</dependency>
87+
<dependency>
88+
<groupId>org.assertj</groupId>
89+
<artifactId>assertj-core</artifactId>
90+
<scope>test</scope>
91+
</dependency>
92+
<dependency>
93+
<groupId>ch.qos.logback</groupId>
94+
<artifactId>logback-classic</artifactId>
95+
<scope>test</scope>
96+
</dependency>
97+
<dependency>
98+
<groupId>com.github.spotbugs</groupId>
99+
<artifactId>spotbugs-annotations</artifactId>
100+
<scope>provided</scope>
101+
</dependency>
102+
</dependencies>
103+
<build>
104+
<plugins>
105+
<plugin>
106+
<groupId>org.apache.maven.plugins</groupId>
107+
<artifactId>maven-compiler-plugin</artifactId>
108+
<version>3.11.0</version>
109+
<configuration>
110+
<compilerId>javac</compilerId>
111+
<forceJavacCompilerUse>true</forceJavacCompilerUse>
112+
<fork>true</fork>
113+
<useIncrementalCompilation>false</useIncrementalCompilation>
114+
<compilerArgs>
115+
<arg>-Werror</arg>
116+
</compilerArgs>
117+
<release>8</release>
118+
<source>8</source>
119+
<target>8</target>
120+
</configuration>
121+
</plugin>
122+
<plugin>
123+
<groupId>org.codehaus.mojo</groupId>
124+
<artifactId>animal-sniffer-maven-plugin</artifactId>
125+
<configuration>
126+
<skip>true</skip>
127+
</configuration>
128+
</plugin>
129+
</plugins>
130+
</build>
131+
</project>
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.datastax.oss.dsbulk.codecs.text;
17+
18+
import com.datastax.oss.dsbulk.codecs.api.CommonConversionContext;
19+
import com.datastax.oss.dsbulk.codecs.text.json.JsonCodecUtils;
20+
import com.fasterxml.jackson.databind.ObjectMapper;
21+
import edu.umd.cs.findbugs.annotations.NonNull;
22+
import java.util.Objects;
23+
24+
public class TextConversionContext extends CommonConversionContext {
25+
26+
public static final String OBJECT_MAPPER = "OBJECT_MAPPER";
27+
28+
public TextConversionContext() {
29+
addAttribute(OBJECT_MAPPER, JsonCodecUtils.getObjectMapper());
30+
}
31+
32+
public TextConversionContext setObjectMapper(@NonNull ObjectMapper objectMapper) {
33+
addAttribute(OBJECT_MAPPER, Objects.requireNonNull(objectMapper));
34+
return this;
35+
}
36+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.datastax.oss.dsbulk.codecs.text.json;
17+
18+
import com.datastax.oss.driver.api.core.type.reflect.GenericType;
19+
import com.fasterxml.jackson.core.json.JsonReadFeature;
20+
import com.fasterxml.jackson.databind.DeserializationFeature;
21+
import com.fasterxml.jackson.databind.JsonNode;
22+
import com.fasterxml.jackson.databind.ObjectMapper;
23+
import com.fasterxml.jackson.databind.json.JsonMapper;
24+
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
25+
import java.math.BigDecimal;
26+
27+
public class JsonCodecUtils {
28+
29+
public static final GenericType<JsonNode> JSON_NODE_TYPE = GenericType.of(JsonNode.class);
30+
31+
/**
32+
* A {@link JsonNodeFactory} that preserves {@link BigDecimal} scales, used to generate Json
33+
* nodes.
34+
*/
35+
public static final JsonNodeFactory JSON_NODE_FACTORY =
36+
JsonNodeFactory.withExactBigDecimals(true);
37+
38+
/**
39+
* The object mapper to use for converting Json nodes to and from Java types in Json codecs.
40+
*
41+
* <p>This is not the object mapper used by the Json connector to read and write Json files.
42+
*
43+
* @return The object mapper to use for converting Json nodes to and from Java types in Json
44+
* codecs.
45+
*/
46+
public static ObjectMapper getObjectMapper() {
47+
return JsonMapper.builder()
48+
.nodeFactory(JSON_NODE_FACTORY)
49+
// create a somewhat lenient mapper that recognizes a slightly relaxed Json syntax when
50+
// parsing
51+
.enable(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES)
52+
.enable(JsonReadFeature.ALLOW_MISSING_VALUES)
53+
.enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS)
54+
.enable(JsonReadFeature.ALLOW_SINGLE_QUOTES)
55+
// fail on trailing tokens: the entire input must be parsed
56+
.enable(DeserializationFeature.FAIL_ON_TRAILING_TOKENS)
57+
.build();
58+
}
59+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.datastax.oss.dsbulk.codecs.text.json;
17+
18+
import com.datastax.oss.driver.api.core.type.codec.TypeCodec;
19+
import com.datastax.oss.dsbulk.codecs.api.ConvertingCodec;
20+
import com.fasterxml.jackson.databind.JsonNode;
21+
import java.util.List;
22+
23+
public abstract class JsonNodeConvertingCodec<T> extends ConvertingCodec<JsonNode, T> {
24+
25+
private final List<String> nullStrings;
26+
27+
protected JsonNodeConvertingCodec(TypeCodec<T> targetCodec, List<String> nullStrings) {
28+
super(targetCodec, JsonNode.class);
29+
this.nullStrings = nullStrings;
30+
}
31+
32+
/**
33+
* Whether the input is null.
34+
*
35+
* <p>This method should be used to inspect external inputs that are meant to be converted <em>to
36+
* textual CQL types only (text, varchar and ascii)</em>.
37+
*
38+
* <p>It always considers the empty string as NOT equivalent to NULL, unless the user clearly
39+
* specifies that the empty string is to be considered as NULL, through the <code>
40+
* codec.nullStrings</code> setting.
41+
*
42+
* <p>Do NOT use this method for non-textual CQL types; use {@link #isNullOrEmpty(JsonNode)}
43+
* instead.
44+
*/
45+
protected boolean isNull(JsonNode node) {
46+
return node == null
47+
|| node.isNull()
48+
|| node.isMissingNode()
49+
|| (node.isValueNode() && nullStrings.contains(node.asText()));
50+
}
51+
52+
/**
53+
* Whether the input is null or empty.
54+
*
55+
* <p>This method should be used to inspect external inputs that are meant to be converted <em>to
56+
* non-textual CQL types only</em>.
57+
*
58+
* <p>It always considers the empty string as equivalent to NULL, which is in compliance with the
59+
* documentation of <code>codec.nullStrings</code>: "Note that, regardless of this setting, DSBulk
60+
* will always convert empty strings to `null` if the target CQL type is not textual (i.e. not
61+
* text, varchar or ascii)."
62+
*
63+
* <p>Do NOT use this method for textual CQL types; use {@link #isNull(JsonNode)} instead.
64+
*/
65+
protected boolean isNullOrEmpty(JsonNode node) {
66+
return isNull(node) || (node.isValueNode() && node.asText().isEmpty());
67+
}
68+
}

0 commit comments

Comments
 (0)