Skip to content

Commit 4516200

Browse files
authored
Sync EQ/NEQ on Arrays in Flat Collections with Mongo (#257)
* Added test cases for the following when LHS is a JSON nested array: 1. CONTAINS scalar. 2. NOT_CONTAINS scalar. 3. CONTAINS array. 4. NOT_CONTAINS array.
1 parent df5c5f6 commit 4516200

File tree

11 files changed

+965
-282
lines changed

11 files changed

+965
-282
lines changed

document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java

Lines changed: 246 additions & 275 deletions
Large diffs are not rendered by default.

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/builder/PostgresSelectExpressionParserBuilderImpl.java

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,31 @@
11
package org.hypertrace.core.documentstore.postgres.query.v1.parser.builder;
22

3+
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.EQ;
4+
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NEQ;
35
import static org.hypertrace.core.documentstore.postgres.utils.PostgresUtils.getType;
46

5-
import lombok.AllArgsConstructor;
7+
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
8+
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
9+
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
10+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
611
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
12+
import org.hypertrace.core.documentstore.expression.type.SelectTypeExpression;
713
import org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser;
814
import org.hypertrace.core.documentstore.postgres.query.v1.vistors.PostgresConstantExpressionVisitor;
915
import org.hypertrace.core.documentstore.postgres.query.v1.vistors.PostgresDataAccessorIdentifierExpressionVisitor;
1016
import org.hypertrace.core.documentstore.postgres.query.v1.vistors.PostgresFieldIdentifierExpressionVisitor;
1117
import org.hypertrace.core.documentstore.postgres.query.v1.vistors.PostgresFunctionExpressionVisitor;
1218
import org.hypertrace.core.documentstore.postgres.query.v1.vistors.PostgresSelectTypeExpressionVisitor;
1319

14-
@AllArgsConstructor
1520
public class PostgresSelectExpressionParserBuilderImpl
1621
implements PostgresSelectExpressionParserBuilder {
1722

1823
private final PostgresQueryParser postgresQueryParser;
1924

25+
public PostgresSelectExpressionParserBuilderImpl(PostgresQueryParser postgresQueryParser) {
26+
this.postgresQueryParser = postgresQueryParser;
27+
}
28+
2029
@Override
2130
public PostgresSelectTypeExpressionVisitor build(final RelationalExpression expression) {
2231
switch (expression.getOperator()) {
@@ -29,11 +38,66 @@ public PostgresSelectTypeExpressionVisitor build(final RelationalExpression expr
2938
return new PostgresFunctionExpressionVisitor(
3039
new PostgresFieldIdentifierExpressionVisitor(this.postgresQueryParser));
3140

41+
case EQ:
42+
case NEQ:
43+
// For EQ/NEQ on array fields, treat like CONTAINS to use -> instead of ->>
44+
if (shouldSwitchToContainsFlow(expression)) {
45+
// Use field identifier (JSON accessor ->) for array fields
46+
return new PostgresFunctionExpressionVisitor(
47+
new PostgresFieldIdentifierExpressionVisitor(this.postgresQueryParser));
48+
}
49+
// Fall through to default for non-array fields
3250
default:
3351
return new PostgresFunctionExpressionVisitor(
3452
new PostgresDataAccessorIdentifierExpressionVisitor(
3553
this.postgresQueryParser,
3654
getType(expression.getRhs().accept(new PostgresConstantExpressionVisitor()))));
3755
}
3856
}
57+
58+
/**
59+
* Checks if this is an EQ/NEQ operator on an array field.
60+
*
61+
* <p>Only converts to CONTAINS when RHS is a scalar value. If RHS is an array, we want exact
62+
* equality match, not containment.
63+
*
64+
* <p>Handles both:
65+
*
66+
* <ul>
67+
* <li>{@link JsonIdentifierExpression} with array field type (JSONB arrays)
68+
* <li>{@link ArrayIdentifierExpression} with array type (top-level array columns)
69+
* </ul>
70+
*/
71+
private boolean shouldSwitchToContainsFlow(final RelationalExpression expression) {
72+
if (expression.getOperator() != EQ && expression.getOperator() != NEQ) {
73+
return false;
74+
}
75+
76+
// Check if RHS is an array/iterable - if so, don't convert (since we want an exact match for
77+
// such cases)
78+
if (expression.getRhs() instanceof ConstantExpression) {
79+
ConstantExpression constExpr = (ConstantExpression) expression.getRhs();
80+
if (constExpr.getValue() instanceof Iterable) {
81+
return false;
82+
}
83+
}
84+
85+
return isArrayField(expression.getLhs());
86+
}
87+
88+
private boolean isArrayField(final SelectTypeExpression lhs) {
89+
if (lhs instanceof JsonIdentifierExpression) {
90+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) lhs;
91+
return jsonExpr
92+
.getFieldType()
93+
.map(
94+
fieldType ->
95+
fieldType == JsonFieldType.BOOLEAN_ARRAY
96+
|| fieldType == JsonFieldType.STRING_ARRAY
97+
|| fieldType == JsonFieldType.NUMBER_ARRAY
98+
|| fieldType == JsonFieldType.OBJECT_ARRAY)
99+
.orElse(false);
100+
}
101+
return lhs instanceof ArrayIdentifierExpression;
102+
}
39103
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;
2+
3+
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
4+
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
5+
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
6+
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
7+
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression.DocumentConstantExpression;
8+
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
9+
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
10+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
11+
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;
12+
13+
/**
14+
* Selects the appropriate array equality parser based on the LHS expression type.
15+
*
16+
* <p>For JsonIdentifierExpression: uses JSONB array equality parser
17+
*
18+
* <p>For ArrayIdentifierExpression: uses top-level array equality parser
19+
*/
20+
class PostgresArrayEqualityParserSelector implements SelectTypeExpressionVisitor {
21+
22+
private static final PostgresRelationalFilterParser jsonArrayEqualityParser =
23+
new PostgresJsonArrayEqualityFilterParser();
24+
private static final PostgresRelationalFilterParser topLevelArrayEqualityParser =
25+
new PostgresTopLevelArrayEqualityFilterParser();
26+
private static final PostgresRelationalFilterParser standardParser =
27+
new PostgresStandardRelationalFilterParser();
28+
29+
@Override
30+
public PostgresRelationalFilterParser visit(JsonIdentifierExpression expression) {
31+
return jsonArrayEqualityParser;
32+
}
33+
34+
@Override
35+
public PostgresRelationalFilterParser visit(ArrayIdentifierExpression expression) {
36+
return topLevelArrayEqualityParser;
37+
}
38+
39+
@Override
40+
public <T> T visit(IdentifierExpression expression) {
41+
return (T) standardParser;
42+
}
43+
44+
@Override
45+
public <T> T visit(AggregateExpression expression) {
46+
return (T) standardParser;
47+
}
48+
49+
@Override
50+
public <T> T visit(ConstantExpression expression) {
51+
return (T) standardParser;
52+
}
53+
54+
@Override
55+
public <T> T visit(DocumentConstantExpression expression) {
56+
return (T) standardParser;
57+
}
58+
59+
@Override
60+
public <T> T visit(FunctionExpression expression) {
61+
return (T) standardParser;
62+
}
63+
64+
@Override
65+
public <T> T visit(AliasedIdentifierExpression expression) {
66+
return (T) standardParser;
67+
}
68+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;
2+
3+
import com.fasterxml.jackson.core.JsonProcessingException;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
5+
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
6+
7+
/**
8+
* Handles EQ/NEQ operations on JSONB array fields when RHS is also an array, using exact equality
9+
* (=) instead of containment (@>).
10+
*
11+
* <p>Generates: {@code props->'source-loc' = '["hygiene","family-pack"]'::jsonb}
12+
*/
13+
class PostgresJsonArrayEqualityFilterParser implements PostgresRelationalFilterParser {
14+
15+
private static final PostgresStandardRelationalOperatorMapper mapper =
16+
new PostgresStandardRelationalOperatorMapper();
17+
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
18+
19+
@Override
20+
public String parse(
21+
final RelationalExpression expression, final PostgresRelationalFilterContext context) {
22+
final String parsedLhs = expression.getLhs().accept(context.lhsParser());
23+
final Object parsedRhs = expression.getRhs().accept(context.rhsParser());
24+
final String operator = mapper.getMapping(expression.getOperator(), parsedRhs);
25+
26+
if (parsedRhs == null) {
27+
return String.format("%s %s NULL", parsedLhs, operator);
28+
}
29+
30+
// Convert the array to a JSONB string representation
31+
try {
32+
String jsonbValue;
33+
if (parsedRhs instanceof Iterable) {
34+
jsonbValue = OBJECT_MAPPER.writeValueAsString(parsedRhs);
35+
} else {
36+
jsonbValue = String.valueOf(parsedRhs);
37+
}
38+
context.getParamsBuilder().addObjectParam(jsonbValue);
39+
return String.format("%s %s ?::jsonb", parsedLhs, operator);
40+
} catch (JsonProcessingException e) {
41+
throw new RuntimeException("Failed to serialize RHS array to JSON", e);
42+
}
43+
}
44+
}

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotContainsRelationalFilterParser.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ private boolean shouldUseJsonParser(
3535

3636
boolean isJsonField = expression.getLhs() instanceof JsonIdentifierExpression;
3737
boolean isFlatCollection = context.getPgColTransformer().getDocumentType() == DocumentType.FLAT;
38-
boolean useJsonParser = !isFlatCollection || isJsonField;
3938

40-
return useJsonParser;
39+
return !isFlatCollection || isJsonField;
4140
}
4241
}

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresRelationalFilterParserFactoryImpl.java

Lines changed: 137 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22

33
import static java.util.Map.entry;
44
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.CONTAINS;
5+
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.EQ;
56
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.EXISTS;
67
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.IN;
78
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.LIKE;
9+
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NEQ;
810
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NOT_CONTAINS;
911
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NOT_EXISTS;
1012
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NOT_IN;
@@ -13,12 +15,18 @@
1315
import com.google.common.collect.Maps;
1416
import java.util.Map;
1517
import org.hypertrace.core.documentstore.DocumentType;
18+
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
19+
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
20+
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
21+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
1622
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
1723
import org.hypertrace.core.documentstore.expression.operators.RelationalOperator;
24+
import org.hypertrace.core.documentstore.expression.type.SelectTypeExpression;
1825
import org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser;
1926

2027
public class PostgresRelationalFilterParserFactoryImpl
2128
implements PostgresRelationalFilterParserFactory {
29+
2230
private static final Map<RelationalOperator, PostgresRelationalFilterParser> parserMap =
2331
Maps.immutableEnumMap(
2432
Map.ofEntries(
@@ -41,12 +49,139 @@ public PostgresRelationalFilterParser parser(
4149
boolean isFlatCollection =
4250
postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT;
4351

44-
if (expression.getOperator() == CONTAINS) {
52+
RelationalOperator operator = expression.getOperator();
53+
// Transform EQ/NEQ to CONTAINS/NOT_CONTAINS for array fields with scalar RHS
54+
// (but not for unnested fields, which are already scalar)
55+
if (shouldConvertEqToContains(expression, postgresQueryParser)) {
56+
operator = (expression.getOperator() == EQ) ? CONTAINS : NOT_CONTAINS;
57+
}
58+
59+
if (operator == CONTAINS) {
4560
return expression.getLhs().accept(new PostgresContainsParserSelector(isFlatCollection));
46-
} else if (expression.getOperator() == IN) {
61+
} else if (operator == IN) {
4762
return expression.getLhs().accept(new PostgresInParserSelector(isFlatCollection));
63+
} else if (operator == NOT_CONTAINS) {
64+
return parserMap.get(NOT_CONTAINS);
65+
}
66+
67+
// For EQ/NEQ on array fields with array RHS, use specialized array equality parser (exact match
68+
// instead of containment)
69+
if (shouldUseArrayEqualityParser(expression, postgresQueryParser)) {
70+
return expression.getLhs().accept(new PostgresArrayEqualityParserSelector());
4871
}
4972

5073
return parserMap.getOrDefault(expression.getOperator(), postgresStandardRelationalFilterParser);
5174
}
75+
76+
/**
77+
* Determines if EQ/NEQ should be converted to CONTAINS/NOT_CONTAINS.
78+
*
79+
* <p>Conversion happens when:
80+
*
81+
* <ul>
82+
* <li>Operator is EQ or NEQ
83+
* <li>RHS is a SCALAR value (not an array/iterable)
84+
* <li>LHS is a JsonIdentifierExpression with an array field type (STRING_ARRAY, NUMBER_ARRAY,
85+
* etc.) OR
86+
* <li>LHS is an ArrayIdentifierExpression with an array type (TEXT, BIGINT, etc.)
87+
* <li>Field has NOT been unnested (unnested fields are scalar, not arrays)
88+
* </ul>
89+
*
90+
* <p>If RHS is an array, we DO NOT convert - we want exact equality match (= operator), not
91+
* containment (@> operator).
92+
*
93+
* <p>This provides semantic equivalence: checking if an array contains a scalar value is more
94+
* intuitive than checking if the array equals the value.
95+
*/
96+
private boolean shouldConvertEqToContains(
97+
final RelationalExpression expression, final PostgresQueryParser postgresQueryParser) {
98+
if (expression.getOperator() != EQ && expression.getOperator() != NEQ) {
99+
return false;
100+
}
101+
102+
// Check if RHS is an array/iterable - if so, don't convert (we want exact match)
103+
if (isArrayRhs(expression.getRhs())) {
104+
return false;
105+
}
106+
107+
// Check if LHS is an array field
108+
if (!isArrayField(expression.getLhs())) {
109+
return false;
110+
}
111+
112+
// Check if field has been unnested - unnested fields are scalar, not arrays
113+
String fieldName = getFieldName(expression.getLhs());
114+
return fieldName == null
115+
|| !postgresQueryParser
116+
.getPgColumnNames()
117+
.containsKey(fieldName); // Field is unnested - treat as scalar
118+
}
119+
120+
/**
121+
* Determines if we should use the specialized array equality parser.
122+
*
123+
* <p>Use this parser when:
124+
*
125+
* <ul>
126+
* <li>Operator is EQ or NEQ
127+
* <li>RHS is an array/iterable (for exact match).
128+
* <li>LHS is either {@link JsonIdentifierExpression} with array type OR {@link
129+
* ArrayIdentifierExpression}
130+
* <li>Field has NOT been unnested (unnested fields are scalar, not arrays)
131+
* </ul>
132+
*/
133+
private boolean shouldUseArrayEqualityParser(
134+
final RelationalExpression expression, final PostgresQueryParser postgresQueryParser) {
135+
if (expression.getOperator() != EQ && expression.getOperator() != NEQ) {
136+
return false;
137+
}
138+
139+
// Check if RHS is an array/iterable AND LHS is an array field
140+
if (!isArrayRhs(expression.getRhs()) || !isArrayField(expression.getLhs())) {
141+
return false;
142+
}
143+
144+
// Check if field has been unnested - unnested fields are scalar, not arrays
145+
String fieldName = getFieldName(expression.getLhs());
146+
return fieldName == null || !postgresQueryParser.getPgColumnNames().containsKey(fieldName);
147+
}
148+
149+
/**
150+
* Checks if the RHS expression contains an array/iterable value. Currently, we don't have a very
151+
* clean way to get the RHS data type. //todo: Implement a clean way to get the RHS data type
152+
*/
153+
private boolean isArrayRhs(final SelectTypeExpression rhs) {
154+
if (rhs instanceof ConstantExpression) {
155+
ConstantExpression constExpr = (ConstantExpression) rhs;
156+
return constExpr.getValue() instanceof Iterable;
157+
}
158+
return false;
159+
}
160+
161+
/** Checks if the LHS expression is an array field. */
162+
private boolean isArrayField(final SelectTypeExpression lhs) {
163+
if (lhs instanceof JsonIdentifierExpression) {
164+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) lhs;
165+
return jsonExpr
166+
.getFieldType()
167+
.map(
168+
fieldType ->
169+
fieldType == JsonFieldType.BOOLEAN_ARRAY
170+
|| fieldType == JsonFieldType.STRING_ARRAY
171+
|| fieldType == JsonFieldType.NUMBER_ARRAY
172+
|| fieldType == JsonFieldType.OBJECT_ARRAY)
173+
.orElse(false);
174+
}
175+
return lhs instanceof ArrayIdentifierExpression;
176+
}
177+
178+
/** Extracts the field name from an identifier expression. */
179+
private String getFieldName(final SelectTypeExpression lhs) {
180+
if (lhs instanceof JsonIdentifierExpression) {
181+
return ((JsonIdentifierExpression) lhs).getName();
182+
} else if (lhs instanceof ArrayIdentifierExpression) {
183+
return ((ArrayIdentifierExpression) lhs).getName();
184+
}
185+
return null;
186+
}
52187
}

0 commit comments

Comments
 (0)