Skip to content

Commit 1dd9ad5

Browse files
authored
Merge pull request #32 from embulk/flatten-json-array
Implement flattening JSON Arrays
2 parents 8eba18b + 1872ce6 commit 1dd9ad5

File tree

4 files changed

+268
-6
lines changed

4 files changed

+268
-6
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright 2023 The Embulk project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.embulk.util.json;
18+
19+
import com.fasterxml.jackson.core.filter.TokenFilter;
20+
21+
/**
22+
* Simple {@link TokenFilter} implementation to flatten top-level JSON Array(s).
23+
*/
24+
class FlattenJsonArrayFilter extends TokenFilter {
25+
FlattenJsonArrayFilter(final int depth) {
26+
if (depth <= 0) {
27+
throw new IllegalArgumentException("FlattenJsonArrayFilter must receive at least 1 as depth.");
28+
}
29+
this.depth = depth;
30+
}
31+
32+
@Override
33+
public TokenFilter includeElement(final int index) {
34+
if (this.depth <= 1) {
35+
return TokenFilter.INCLUDE_ALL;
36+
}
37+
return new FlattenJsonArrayFilter(this.depth - 1);
38+
}
39+
40+
@Override
41+
public TokenFilter includeProperty(final String name) {
42+
return null;
43+
}
44+
45+
@Override
46+
public String toString() {
47+
return "[FlattenJsonArrayFilter depth: " + this.depth + "]";
48+
}
49+
50+
private final int depth;
51+
}

src/main/java/org/embulk/util/json/JsonValueParser.java

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,15 @@
3434
public final class JsonValueParser implements Closeable {
3535
private JsonValueParser(
3636
final com.fasterxml.jackson.core.JsonParser jacksonParser,
37+
final int depthToFlattenJsonArrays,
3738
final boolean hasLiteralsWithNumbers,
3839
final boolean hasFallbacksForUnparsableNumbers,
3940
final double defaultDouble,
4041
final long defaultLong) {
4142
this.jacksonParser = Objects.requireNonNull(jacksonParser);
4243
this.valueReader = new InternalJsonValueReader(
4344
hasLiteralsWithNumbers, hasFallbacksForUnparsableNumbers, defaultDouble, defaultLong);
45+
this.depthToFlattenJsonArrays = depthToFlattenJsonArrays;
4446
this.hasLiteralsWithNumbers = hasLiteralsWithNumbers;
4547
this.hasFallbacksForUnparsableNumbers = hasFallbacksForUnparsableNumbers;
4648
this.defaultDouble = defaultDouble;
@@ -54,6 +56,7 @@ public static final class Builder {
5456
Builder(final JsonFactory factory) {
5557
this.factory = Objects.requireNonNull(factory);
5658
this.root = null;
59+
this.depthToFlattenJsonArrays = 0;
5760
this.hasLiteralsWithNumbers = false;
5861
this.hasFallbacksForUnparsableNumbers = false;
5962
this.defaultDouble = 0.0;
@@ -84,6 +87,17 @@ public Builder root(final String root) {
8487
return this;
8588
}
8689

90+
/**
91+
* Sets the depth to flatten JSON Arrays to parse.
92+
*
93+
* @param depthToFlattenJsonArrays the depth to flatten JSON Arrays
94+
* @return this builder
95+
*/
96+
public Builder setDepthToFlattenJsonArrays(final int depthToFlattenJsonArrays) {
97+
this.depthToFlattenJsonArrays = depthToFlattenJsonArrays;
98+
return this;
99+
}
100+
87101
/**
88102
* Enables creating {@link JsonDouble} and {@link JsonLong} instances with supplemental literal strings.
89103
*
@@ -126,6 +140,7 @@ public Builder fallbackForUnparsableNumbers(final double defaultDouble, final lo
126140
public JsonValueParser build(final String json) throws IOException {
127141
return new JsonValueParser(
128142
buildJacksonParser(json),
143+
this.depthToFlattenJsonArrays,
129144
this.hasLiteralsWithNumbers,
130145
this.hasFallbacksForUnparsableNumbers,
131146
this.defaultDouble,
@@ -141,6 +156,7 @@ public JsonValueParser build(final String json) throws IOException {
141156
public JsonValueParser build(final InputStream jsonStream) throws IOException {
142157
return new JsonValueParser(
143158
buildJacksonParser(jsonStream),
159+
this.depthToFlattenJsonArrays,
144160
this.hasLiteralsWithNumbers,
145161
this.hasFallbacksForUnparsableNumbers,
146162
this.defaultDouble,
@@ -156,20 +172,30 @@ private com.fasterxml.jackson.core.JsonParser buildJacksonParser(final InputStre
156172
}
157173

158174
private com.fasterxml.jackson.core.JsonParser extendJacksonParser(final com.fasterxml.jackson.core.JsonParser baseParser) {
159-
if (this.root == null) {
160-
return baseParser;
161-
}
162-
return new FilteringParserDelegate(
163-
baseParser,
175+
com.fasterxml.jackson.core.JsonParser parser = baseParser;
176+
if (this.root != null) {
177+
parser = new FilteringParserDelegate(
178+
parser,
164179
new JsonPointerBasedFilter(this.root),
165180
false, // TODO: Use com.fasterxml.jackson.core.filter.TokenFilter.Inclusion since Jackson 2.12.
166181
true // Allow multiple matches
167182
);
183+
}
184+
if (this.depthToFlattenJsonArrays > 0) {
185+
parser = new FilteringParserDelegate(
186+
parser,
187+
new FlattenJsonArrayFilter(this.depthToFlattenJsonArrays),
188+
false, // TODO: Use com.fasterxml.jackson.core.filter.TokenFilter.Inclusion since Jackson 2.12.
189+
true // Allow multiple matches
190+
);
191+
}
192+
return parser;
168193
}
169194

170195
private final JsonFactory factory;
171196

172197
private JsonPointer root;
198+
private int depthToFlattenJsonArrays;
173199
private boolean hasLiteralsWithNumbers;
174200
private boolean hasFallbacksForUnparsableNumbers;
175201
private double defaultDouble;
@@ -243,6 +269,7 @@ public final void close() throws IOException {
243269
private final com.fasterxml.jackson.core.JsonParser jacksonParser;
244270
private final InternalJsonValueReader valueReader;
245271

272+
private final int depthToFlattenJsonArrays;
246273
private final boolean hasLiteralsWithNumbers;
247274
private final boolean hasFallbacksForUnparsableNumbers;
248275
private final double defaultDouble;
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Copyright 2023 The Embulk project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.embulk.util.json;
18+
19+
import static org.junit.jupiter.api.Assertions.assertEquals;
20+
import static org.junit.jupiter.api.Assertions.assertNull;
21+
import static org.junit.jupiter.api.Assertions.assertThrows;
22+
23+
import com.fasterxml.jackson.core.JsonFactory;
24+
import com.fasterxml.jackson.core.JsonToken;
25+
import com.fasterxml.jackson.core.filter.FilteringParserDelegate;
26+
import java.io.IOException;
27+
import org.junit.jupiter.api.Test;
28+
29+
public class TestFlattenJsonArrayFilter {
30+
@Test
31+
public void testSimple() throws IOException {
32+
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("[{\"foo\":\"bar\"}]", 1);
33+
assertEquals(JsonToken.START_OBJECT, parser.nextToken());
34+
assertEquals(JsonToken.FIELD_NAME, parser.nextToken());
35+
assertEquals("foo", parser.getValueAsString());
36+
assertEquals(JsonToken.VALUE_STRING, parser.nextToken());
37+
assertEquals("bar", parser.getValueAsString());
38+
assertEquals(JsonToken.END_OBJECT, parser.nextToken());
39+
assertNull(parser.nextToken());
40+
}
41+
42+
@Test
43+
public void testMultiple() throws IOException {
44+
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("[{\"foo\":\"bar\"},{\"foo\":\"baz\"}]", 1);
45+
assertEquals(JsonToken.START_OBJECT, parser.nextToken());
46+
assertEquals(JsonToken.FIELD_NAME, parser.nextToken());
47+
assertEquals("foo", parser.getValueAsString());
48+
assertEquals(JsonToken.VALUE_STRING, parser.nextToken());
49+
assertEquals("bar", parser.getValueAsString());
50+
assertEquals(JsonToken.END_OBJECT, parser.nextToken());
51+
assertEquals(JsonToken.START_OBJECT, parser.nextToken());
52+
assertEquals(JsonToken.FIELD_NAME, parser.nextToken());
53+
assertEquals("foo", parser.getValueAsString());
54+
assertEquals(JsonToken.VALUE_STRING, parser.nextToken());
55+
assertEquals("baz", parser.getValueAsString());
56+
assertEquals(JsonToken.END_OBJECT, parser.nextToken());
57+
assertNull(parser.nextToken());
58+
}
59+
60+
@Test
61+
public void testNested() throws IOException {
62+
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("[[{\"foo\":\"bar\"}]]", 1);
63+
assertEquals(JsonToken.START_ARRAY, parser.nextToken());
64+
assertEquals(JsonToken.START_OBJECT, parser.nextToken());
65+
assertEquals(JsonToken.FIELD_NAME, parser.nextToken());
66+
assertEquals("foo", parser.getValueAsString());
67+
assertEquals(JsonToken.VALUE_STRING, parser.nextToken());
68+
assertEquals("bar", parser.getValueAsString());
69+
assertEquals(JsonToken.END_OBJECT, parser.nextToken());
70+
assertEquals(JsonToken.END_ARRAY, parser.nextToken());
71+
assertNull(parser.nextToken());
72+
}
73+
74+
@Test
75+
public void testNested2() throws IOException {
76+
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("[[{\"foo\":\"bar\"}]]", 2);
77+
assertEquals(JsonToken.START_OBJECT, parser.nextToken());
78+
assertEquals(JsonToken.FIELD_NAME, parser.nextToken());
79+
assertEquals("foo", parser.getValueAsString());
80+
assertEquals(JsonToken.VALUE_STRING, parser.nextToken());
81+
assertEquals("bar", parser.getValueAsString());
82+
assertEquals(JsonToken.END_OBJECT, parser.nextToken());
83+
assertNull(parser.nextToken());
84+
}
85+
86+
@Test
87+
public void testNoArray() throws IOException {
88+
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("{\"foo\":\"bar\"}", 1);
89+
assertNull(parser.nextToken());
90+
}
91+
92+
@Test
93+
public void test0() throws IOException {
94+
assertThrows(IllegalArgumentException.class, () -> {
95+
new FlattenJsonArrayFilter(0);
96+
});
97+
}
98+
99+
private static com.fasterxml.jackson.core.JsonParser createFilteredParser(
100+
final String json,
101+
final int depth) throws IOException {
102+
final JsonFactory factory = new JsonFactory();
103+
factory.enable(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
104+
factory.enable(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS);
105+
return new FilteringParserDelegate(
106+
factory.createParser(json),
107+
new FlattenJsonArrayFilter(depth),
108+
false, // TODO: Use com.fasterxml.jackson.core.filter.TokenFilter.Inclusion since Jackson 2.12.
109+
true // Allow multiple matches
110+
);
111+
}
112+
}

src/test/java/org/embulk/util/json/TestJsonValueParser.java

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,27 @@ public void testParseMultipleJsonsWithPointer() throws Exception {
134134
assertNull(parser.readJsonValue());
135135
}
136136

137+
@Test
138+
public void testFlattenJsonArray() throws Exception {
139+
final JsonValueParser parser = JsonValueParser.builder()
140+
.setDepthToFlattenJsonArrays(1)
141+
.build("[{\"a\": {\"b\": 1}},{\"a\": {\"b\": 2}}]");
142+
assertEquals(JsonObject.of("a", JsonObject.of("b", JsonLong.of(1))), parser.readJsonValue());
143+
assertEquals(JsonObject.of("a", JsonObject.of("b", JsonLong.of(2))), parser.readJsonValue());
144+
assertNull(parser.readJsonValue());
145+
}
146+
147+
@Test
148+
public void testRootWithFlattenJsonArray() throws Exception {
149+
final JsonValueParser parser = JsonValueParser.builder()
150+
.root("/f")
151+
.setDepthToFlattenJsonArrays(1)
152+
.build("{\"f\":[{\"a\": {\"b\": 1}},{\"a\": {\"b\": 2}}]}");
153+
assertEquals(JsonObject.of("a", JsonObject.of("b", JsonLong.of(1))), parser.readJsonValue());
154+
assertEquals(JsonObject.of("a", JsonObject.of("b", JsonLong.of(2))), parser.readJsonValue());
155+
assertNull(parser.readJsonValue());
156+
}
157+
137158
@Test
138159
public void testCaptureJsonPointers() throws Exception {
139160
final JsonValueParser parser = JsonValueParser.builder().build(
@@ -202,9 +223,29 @@ public void testCaptureMixed() throws Exception {
202223
}
203224

204225
@Test
205-
public void testCaptureRoot() throws Exception {
226+
public void testCaptureRootPointer() throws Exception {
206227
final JsonValueParser parser = JsonValueParser.builder().build(
207228
"{\"foo\":12,\"bar\":[true,false],\"baz\":null,\"qux\":{\"hoge\":\"fuga\"}}");
229+
final CapturingPointers pointers = CapturingPointers.builder().build(); // No pointers -- root.
230+
final JsonValue[] values = parser.captureJsonValues(pointers);
231+
assertEquals(1, values.length);
232+
assertEquals(
233+
JsonObject.of(
234+
"foo", JsonLong.of(12L),
235+
"bar", JsonArray.of(JsonBoolean.TRUE, JsonBoolean.FALSE),
236+
"baz", JsonNull.NULL,
237+
"qux", JsonObject.of("hoge", JsonString.of("fuga"))),
238+
values[0]);
239+
240+
// Confirming that JsonValueParser reaches at the end as expected.
241+
242+
assertNull(parser.captureJsonValues(pointers));
243+
}
244+
245+
@Test
246+
public void testCaptureWithRoot() throws Exception {
247+
final JsonValueParser parser = JsonValueParser.builder().root("/ex").build(
248+
"{\"ex\":{\"foo\":12,\"bar\":[true,false],\"baz\":null,\"qux\":{\"hoge\":\"fuga\"}}}");
208249
final CapturingPointers pointers = CapturingPointers.builder().build();
209250
final JsonValue[] values = parser.captureJsonValues(pointers);
210251
assertEquals(1, values.length);
@@ -220,4 +261,35 @@ public void testCaptureRoot() throws Exception {
220261

221262
assertNull(parser.captureJsonValues(pointers));
222263
}
264+
265+
@Test
266+
public void testCaptureWithFlattenJsonArray() throws Exception {
267+
final JsonValueParser parser = JsonValueParser.builder().setDepthToFlattenJsonArrays(1).build(
268+
"[{\"foo\":12,\"bar\":[true,false],\"baz\":null,\"qux\":{\"hoge\":\"fuga\"}},{\"foo\":14,\"bar\":[false],\"baz\":null,\"qux\":{}}]");
269+
final CapturingPointers pointers = CapturingPointers.builder().build();
270+
271+
final JsonValue[] values1 = parser.captureJsonValues(pointers);
272+
assertEquals(1, values1.length);
273+
assertEquals(
274+
JsonObject.of(
275+
"foo", JsonLong.of(12L),
276+
"bar", JsonArray.of(JsonBoolean.TRUE, JsonBoolean.FALSE),
277+
"baz", JsonNull.NULL,
278+
"qux", JsonObject.of("hoge", JsonString.of("fuga"))),
279+
values1[0]);
280+
281+
final JsonValue[] values2 = parser.captureJsonValues(pointers);
282+
assertEquals(1, values2.length);
283+
assertEquals(
284+
JsonObject.of(
285+
"foo", JsonLong.of(14L),
286+
"bar", JsonArray.of(JsonBoolean.FALSE),
287+
"baz", JsonNull.NULL,
288+
"qux", JsonObject.of()),
289+
values2[0]);
290+
291+
// Confirming that JsonValueParser reaches at the end as expected.
292+
293+
assertNull(parser.captureJsonValues(pointers));
294+
}
223295
}

0 commit comments

Comments
 (0)