Skip to content

Commit 5c2ddf5

Browse files
authored
Unnest on nested json arrays in flat collections (#244)
1 parent 1e3c868 commit 5c2ddf5

File tree

3 files changed

+97
-21
lines changed

3 files changed

+97
-21
lines changed

document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3981,6 +3981,71 @@ void testFlatVsNestedCollectionNestedFieldSelections(String dataStoreName) throw
39813981
assertDocsAndSizeEqual(
39823982
dataStoreName, flatBrandNoAliasIterator, "query/no_alias_response.json", 8);
39833983
}
3984+
3985+
/**
3986+
* Tests UNNEST operation on JSONB array fields in flat collections. This validates that
3987+
* jsonb_array_elements() is used for JSONB arrays (props.colors) instead of unnest() which is
3988+
* only for native arrays (tags).
3989+
*/
3990+
@ParameterizedTest
3991+
@ArgumentsSource(PostgresProvider.class)
3992+
void testFlatCollectionUnnestJsonbArray(String dataStoreName) throws IOException {
3993+
Datastore datastore = datastoreMap.get(dataStoreName);
3994+
Collection flatCollection =
3995+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
3996+
3997+
// Test UNNEST on JSONB array field: props.colors
3998+
// Expected: Should unnest colors and count distinct items with colors
3999+
// Data: id=1 has ["Blue", "Green"], id=3 has ["Black"], id=5 has ["Orange", "Blue"]
4000+
// Total: 5 color entries from 3 items
4001+
Query unnestJsonbQuery =
4002+
Query.builder()
4003+
.addSelection(IdentifierExpression.of("item"))
4004+
.addSelection(JsonIdentifierExpression.of("props", "colors"))
4005+
.addFromClause(
4006+
UnnestExpression.of(JsonIdentifierExpression.of("props", "colors"), false))
4007+
.build();
4008+
4009+
Iterator<Document> resultIterator = flatCollection.aggregate(unnestJsonbQuery);
4010+
4011+
long count = 0;
4012+
while (resultIterator.hasNext()) {
4013+
resultIterator.next();
4014+
count++;
4015+
}
4016+
4017+
// Expecting 5 results: 2 from Soap (Blue, Green), 1 from Shampoo (Black),
4018+
// 2 from Lifebuoy (Orange, Blue)
4019+
assertEquals(5, count, "Should find 5 color entries after unnesting JSONB arrays");
4020+
}
4021+
4022+
@ParameterizedTest
4023+
@ArgumentsSource(PostgresProvider.class)
4024+
void testFlatCollectionArrayAnyOnJsonbArray(String dataStoreName) {
4025+
Datastore datastore = datastoreMap.get(dataStoreName);
4026+
Collection flatCollection =
4027+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
4028+
4029+
// Test ArrayRelationalFilterExpression.ANY on JSONB array (props.colors)
4030+
// This uses jsonb_array_elements() internally
4031+
Query jsonbArrayQuery =
4032+
Query.builder()
4033+
.addSelection(IdentifierExpression.of("item"))
4034+
.setFilter(
4035+
ArrayRelationalFilterExpression.builder()
4036+
.operator(ArrayOperator.ANY)
4037+
.filter(
4038+
RelationalExpression.of(
4039+
JsonIdentifierExpression.of("props", "colors"),
4040+
EQ,
4041+
ConstantExpression.of("Blue")))
4042+
.build())
4043+
.build();
4044+
4045+
long count = flatCollection.count(jsonbArrayQuery);
4046+
// ids 1 and 5 have "Blue" in their colors array
4047+
assertEquals(2, count, "Should find 2 items with 'Blue' color (ids 1, 5)");
4048+
}
39844049
}
39854050

39864051
@Nested

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression;
1919
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
2020
import org.hypertrace.core.documentstore.expression.impl.DocumentArrayFilterExpression;
21+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
2122
import org.hypertrace.core.documentstore.expression.impl.KeyExpression;
2223
import org.hypertrace.core.documentstore.expression.impl.LogicalExpression;
2324
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
@@ -169,22 +170,24 @@ private String getFilterStringForAnyOperator(final ArrayRelationalFilterExpressi
169170
boolean isFlatCollection =
170171
postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT;
171172

173+
boolean isJsonbArray = expression.getArraySource() instanceof JsonIdentifierExpression;
174+
172175
// Extract the field name
173176
final String identifierName =
174177
expression
175178
.getArraySource()
176179
.accept(new PostgresIdentifierExpressionVisitor(postgresQueryParser));
177180

178181
final String parsedLhs;
179-
if (isFlatCollection) {
180-
// For flat collections, assume all arrays are native PostgreSQL arrays
182+
if (isFlatCollection && !isJsonbArray) {
183+
// For flat collections with native arrays, use direct column reference
181184
parsedLhs = postgresQueryParser.transformField(identifierName).getPgColumn();
182185
} else {
183-
// For nested collections, use JSONB path accessor
186+
// For nested collections OR JSONB arrays in flat collections, use JSONB path accessor
184187
// Convert 'elements' to planets->'elements' where planets could be an alias for an upper
185188
// level array filter
186189
// For the first time (if 'elements' was not under any nested array, say a top-level field),
187-
// use the field identifier visitor to make it document->'elements'
190+
// use the field identifier visitor to make it document->'elements' or props->'colors'
188191
final PostgresIdentifierExpressionVisitor identifierVisitor =
189192
new PostgresIdentifierExpressionVisitor(postgresQueryParser);
190193
final PostgresSelectTypeExpressionVisitor arrayPathVisitor =
@@ -206,18 +209,18 @@ private String getFilterStringForAnyOperator(final ArrayRelationalFilterExpressi
206209
.getFilter()
207210
.accept(new PostgresFilterTypeExpressionVisitor(postgresQueryParser, visitorProvider));
208211

209-
if (isFlatCollection) {
212+
if (isFlatCollection && !isJsonbArray) {
210213
// todo: For array filters, UNNEST is not the most optimal way as it won't use the index.
211214
// Perhaps, we should use ANY or @> ARRAY operator
212215

213-
// For flat collections, assume all arrays are native and use unnest()
216+
// For flat collections with native arrays (e.g., tags), use unnest()
214217
// Infer array type from filter to properly cast empty array
215218
String arrayTypeCast = inferArrayTypeCastFromFilter(expression.getFilter());
216219
return String.format(
217220
"EXISTS (SELECT 1 FROM unnest(COALESCE(%s, ARRAY[]%s)) AS \"%s\" WHERE %s)",
218221
parsedLhs, arrayTypeCast, alias, parsedFilter);
219222
} else {
220-
// For nested collections with JSONB arrays, use jsonb_array_elements()
223+
// For nested collections OR JSONB arrays in flat collections, use jsonb_array_elements()
221224
return String.format(
222225
"EXISTS (SELECT 1 FROM jsonb_array_elements(COALESCE(%s, '[]'::jsonb)) AS \"%s\" WHERE %s)",
223226
parsedLhs, alias, parsedFilter);
@@ -284,18 +287,20 @@ private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression
284287
boolean isFlatCollection =
285288
postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT;
286289

290+
boolean isJsonbArray = expression.getArraySource() instanceof JsonIdentifierExpression;
291+
287292
// Extract the field name
288293
final String identifierName =
289294
expression
290295
.getArraySource()
291296
.accept(new PostgresIdentifierExpressionVisitor(postgresQueryParser));
292297

293298
final String parsedLhs;
294-
if (isFlatCollection) {
295-
// For flat collections, assume all arrays are native PostgreSQL arrays
296-
// Use direct column reference with double quotes
299+
if (isFlatCollection && !isJsonbArray) {
300+
// For flat collections with native arrays, use direct column reference with double quotes
297301
parsedLhs = postgresQueryParser.transformField(identifierName).getPgColumn();
298302
} else {
303+
// For nested collections OR JSONB arrays in flat collections, use JSONB path accessor
299304
final PostgresIdentifierExpressionVisitor identifierVisitor =
300305
new PostgresIdentifierExpressionVisitor(postgresQueryParser);
301306
final PostgresSelectTypeExpressionVisitor arrayPathVisitor =
@@ -316,16 +321,16 @@ private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression
316321
.getFilter()
317322
.accept(new PostgresFilterTypeExpressionVisitor(postgresQueryParser, wrapper));
318323

319-
if (isFlatCollection) {
320-
// For flat collections, assume all arrays are native and use unnest()
324+
if (isFlatCollection && !isJsonbArray) {
325+
// For flat collections with native arrays, use unnest()
321326
// Note: DocumentArrayFilterExpression typically works with JSONB arrays containing objects
322327
// For simplicity, we default to text[] type cast, though this may need refinement
323328
String arrayTypeCast = "::text[]";
324329
return String.format(
325330
"EXISTS (SELECT 1 FROM unnest(COALESCE(%s, ARRAY[]%s)) AS \"%s\" WHERE %s)",
326331
parsedLhs, arrayTypeCast, alias, parsedFilter);
327332
} else {
328-
// For nested collections with JSONB arrays, use jsonb_array_elements()
333+
// For nested collections OR JSONB arrays in flat collections, use jsonb_array_elements()
329334
return String.format(
330335
"EXISTS (SELECT 1 FROM jsonb_array_elements(COALESCE(%s, '[]'::jsonb)) AS \"%s\" WHERE %s)",
331336
parsedLhs, alias, parsedFilter);

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import java.util.stream.Collectors;
55
import lombok.Getter;
66
import org.hypertrace.core.documentstore.DocumentType;
7+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
78
import org.hypertrace.core.documentstore.expression.impl.SubQueryJoinExpression;
89
import org.hypertrace.core.documentstore.expression.impl.UnnestExpression;
910
import org.hypertrace.core.documentstore.parser.FromTypeExpressionVisitor;
@@ -47,11 +48,14 @@ public String visit(UnnestExpression unnestExpression) {
4748
boolean isFlatCollection =
4849
postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT;
4950

51+
boolean isJsonbArray =
52+
unnestExpression.getIdentifierExpression() instanceof JsonIdentifierExpression;
53+
5054
String transformedFieldName;
5155
String unnestFunction;
5256

53-
if (isFlatCollection) {
54-
// For flat collections, assume all unnested fields are native PostgreSQL arrays
57+
if (isFlatCollection && !isJsonbArray) {
58+
// For flat collections with native arrays (e.g., tags), use unnest()
5559
// Use the transformer to get the proper column name (handles quotes and naming)
5660
transformedFieldName = postgresQueryParser.transformField(orgFieldName).getPgColumn();
5761
// Use native unnest() for PostgreSQL array columns
@@ -60,7 +64,7 @@ public String visit(UnnestExpression unnestExpression) {
6064
// e.g., unnest("tags") p1(tags_unnested) instead of p1(tags)
6165
pgColumnName = pgColumnName + "_unnested";
6266
} else {
63-
// For nested collections, use JSONB path accessor
67+
// For nested collections OR JSONB arrays in flat collections, use jsonb_array_elements()
6468
transformedFieldName =
6569
unnestExpression
6670
.getIdentifierExpression()
@@ -78,8 +82,12 @@ public String visit(UnnestExpression unnestExpression) {
7882
String tableAlias = "t" + preIndex;
7983
String unwindExpr = String.format(unnestFunction, transformedFieldName);
8084

85+
// we'll quote the col name to prevent folding to lower case for top-level array fields
8186
String unwindExprAlias =
82-
String.format(UNWIND_EXP_ALIAS_FMT, nextIndex, getColName(isFlatCollection, pgColumnName));
87+
String.format(
88+
UNWIND_EXP_ALIAS_FMT,
89+
nextIndex,
90+
getColName(isFlatCollection && !isJsonbArray, pgColumnName));
8391

8492
String fmt =
8593
unnestExpression.isPreserveNullAndEmptyArrays()
@@ -144,9 +152,7 @@ private static String prepareTable0Query(PostgresQueryParser postgresQueryParser
144152
/*
145153
Returns the column name with double quotes if the collection is flat to prevent folding to lower-case by PG
146154
*/
147-
private String getColName(boolean isFlatCollection, String pgColumnName) {
148-
return isFlatCollection
149-
? PostgresUtils.wrapFieldNamesWithDoubleQuotes(pgColumnName)
150-
: pgColumnName;
155+
private String getColName(boolean shouldQuote, String pgColumnName) {
156+
return shouldQuote ? PostgresUtils.wrapFieldNamesWithDoubleQuotes(pgColumnName) : pgColumnName;
151157
}
152158
}

0 commit comments

Comments
 (0)