Skip to content

Commit 35fac78

Browse files
authored
Use ARRAY instead of IN queries for Better Perf in Flat PG Collections (#258)
1 parent 4e8e31c commit 35fac78

File tree

14 files changed

+769
-140
lines changed

14 files changed

+769
-140
lines changed

document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java

Lines changed: 348 additions & 12 deletions
Large diffs are not rendered by default.

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/Params.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.util.HashMap;
44
import java.util.Map;
5+
import lombok.Getter;
56

67
/**
78
* Holds the params that need to be set in the PreparedStatement for constructing the final SQL
@@ -29,6 +30,18 @@ public static Builder newBuilder() {
2930
return new Builder();
3031
}
3132

33+
/** Wrapper class to hold array parameter metadata for PostgreSQL array binding */
34+
@Getter
35+
public static class ArrayParam {
36+
private final Object[] values;
37+
private final String sqlType;
38+
39+
public ArrayParam(Object[] values, String sqlType) {
40+
this.values = values;
41+
this.sqlType = sqlType;
42+
}
43+
}
44+
3245
public static class Builder {
3346

3447
private int nextIndex;
@@ -44,6 +57,11 @@ public Builder addObjectParam(Object paramValue) {
4457
return this;
4558
}
4659

60+
public Builder addArrayParam(Object[] values, String sqlType) {
61+
objectParams.put(nextIndex++, new ArrayParam(values, sqlType));
62+
return this;
63+
}
64+
4765
public Params build() {
4866
return new Params(objectParams);
4967
}

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresInRelationalFilterParserJsonArray.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public String parse(
6161

6262
/**
6363
* Generates SQL for scalar IN operator (used when JSONB array field has been unnested). Example:
64-
* "props_dot_source-loc" IN (?::jsonb, ?::jsonb)
64+
* "props_dot_source-loc" = ANY(ARRAY[?::jsonb, ?::jsonb])
6565
*
6666
* <p>Note: After unnesting with jsonb_array_elements(), each row contains a JSONB scalar value.
6767
* We cast the parameters to jsonb for direct JSONB-to-JSONB comparison, which works for all JSONB
@@ -86,7 +86,7 @@ private String prepareFilterStringForScalarInOperator(
8686
.collect(Collectors.joining(", "));
8787

8888
// Direct JSONB comparison - no text conversion needed
89-
return String.format("%s IN (%s)", parsedLhs, placeholders);
89+
return String.format("%s = ANY(ARRAY[%s])", parsedLhs, placeholders);
9090
}
9191

9292
/**

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresInRelationalFilterParserJsonPrimitive.java

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;
22

3-
import java.util.stream.Collectors;
43
import java.util.stream.StreamSupport;
54
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
65
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
@@ -11,12 +10,13 @@
1110
* Optimized parser for IN operations on JSON primitive fields (string, number, boolean) with proper
1211
* type casting.
1312
*
14-
* <p>Generates efficient SQL using {@code ->>} operator with appropriate PostgreSQL casting:
13+
* <p>Generates efficient SQL using {@code ->>} operator with {@code = ANY(ARRAY[])} and appropriate
14+
* PostgreSQL casting:
1515
*
1616
* <ul>
17-
* <li><b>STRING:</b> {@code "document" ->> 'item' IN ('Soap', 'Shampoo')}
18-
* <li><b>NUMBER:</b> {@code CAST("document" ->> 'price' AS NUMERIC) IN (10, 20)}
19-
* <li><b>BOOLEAN:</b> {@code CAST("document" ->> 'active' AS BOOLEAN) IN (true, false)}
17+
* <li><b>STRING:</b> {@code "document" ->> 'item' = ANY(ARRAY['Soap', 'Shampoo'])}
18+
* <li><b>NUMBER:</b> {@code CAST("document" ->> 'price' AS NUMERIC) = ANY(ARRAY[10, 20])}
19+
* <li><b>BOOLEAN:</b> {@code CAST("document" ->> 'active' AS BOOLEAN) = ANY(ARRAY[true, false])}
2020
* </ul>
2121
*
2222
* <p>This is much more efficient than the defensive approach that checks both array and scalar
@@ -61,14 +61,15 @@ private String prepareFilterStringForInOperator(
6161
final JsonFieldType fieldType,
6262
final Params.Builder paramsBuilder) {
6363

64-
String placeholders =
65-
StreamSupport.stream(parsedRhs.spliterator(), false)
66-
.map(
67-
value -> {
68-
paramsBuilder.addObjectParam(value);
69-
return "?";
70-
})
71-
.collect(Collectors.joining(", "));
64+
Object[] values = StreamSupport.stream(parsedRhs.spliterator(), false).toArray();
65+
66+
if (values.length == 0) {
67+
// return FALSE
68+
return "1 = 0";
69+
}
70+
71+
String sqlType = mapJsonFieldTypeToSqlType(fieldType);
72+
paramsBuilder.addArrayParam(values, sqlType);
7273

7374
// Apply appropriate casting based on field type
7475
String lhsWithCast = parsedLhs;
@@ -77,8 +78,18 @@ private String prepareFilterStringForInOperator(
7778
} else if (fieldType == JsonFieldType.BOOLEAN) {
7879
lhsWithCast = String.format("CAST(%s AS BOOLEAN)", parsedLhs);
7980
}
80-
// STRING or null fieldType: no casting needed
81+
return String.format("%s = ANY(?)", lhsWithCast);
82+
}
8183

82-
return String.format("%s IN (%s)", lhsWithCast, placeholders);
84+
private String mapJsonFieldTypeToSqlType(JsonFieldType fieldType) {
85+
switch (fieldType) {
86+
case NUMBER:
87+
return "float8";
88+
case BOOLEAN:
89+
return "bool";
90+
case STRING:
91+
default:
92+
return "text";
93+
}
8394
}
8495
}

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresTopLevelArrayEqualityFilterParser.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import java.util.stream.StreamSupport;
66
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
77
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
8-
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresArrayTypeExtractor;
8+
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresTypeExtractor;
99

1010
/**
1111
* Handles EQ/NEQ operations on top-level array columns when RHS is also an array, using exact
@@ -43,7 +43,7 @@ public String parse(
4343
.collect(Collectors.joining(", "));
4444

4545
ArrayIdentifierExpression arrayExpr = (ArrayIdentifierExpression) expression.getLhs();
46-
String arrayTypeCast = arrayExpr.accept(new PostgresArrayTypeExtractor());
46+
String arrayTypeCast = arrayExpr.accept(PostgresTypeExtractor.arrayType());
4747

4848
// Generate: tags = ARRAY[?, ?]::text[]
4949
if (arrayTypeCast != null) {

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresContainsRelationalFilterParserNonJsonField.java

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,11 @@ public String parse(
5050
}
5151

5252
// Field is NOT unnested - use array containment operator
53-
String arrayTypeCast = expression.getLhs().accept(new PostgresArrayTypeExtractor());
53+
String arrayType = expression.getLhs().accept(PostgresTypeExtractor.arrayType());
54+
// Fallback to text[] if type is unknown
55+
String typeCast = (arrayType != null) ? arrayType : "text[]";
5456

55-
// Use ARRAY[?, ?, ...] syntax with appropriate type cast
56-
if (arrayTypeCast != null && arrayTypeCast.equals("text[]")) {
57-
return String.format("%s @> ARRAY[%s]::text[]", parsedLhs, placeholders);
58-
} else if (arrayTypeCast != null) {
59-
// INTEGER/BOOLEAN/DOUBLE arrays: Use the correct type cast
60-
return String.format("%s @> ARRAY[%s]::%s", parsedLhs, placeholders, arrayTypeCast);
61-
} else {
62-
// Fallback: use text[] cast
63-
return String.format("%s @> ARRAY[%s]::text[]", parsedLhs, placeholders);
64-
}
57+
return String.format("%s @> ARRAY[%s]::%s", parsedLhs, placeholders, typeCast);
6558
}
6659

6760
private Iterable<Object> normalizeToIterable(final Object value) {

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresDataType.java

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,36 @@
55
/**
66
* PostgreSQL-specific data types with their SQL type strings.
77
*
8-
* <p>This enum maps generic {@link DataType} values to PostgreSQL-specific type strings used in SQL
9-
* queries for type casting.
8+
* <p>This enum maps generic {@link DataType} values to PostgreSQL internal type names, which work
9+
* for both JDBC's {@code Connection.createArrayOf()} and SQL type casting.
1010
*/
1111
public enum PostgresDataType {
1212
TEXT("text"),
13-
INTEGER("integer"),
14-
BIGINT("bigint"),
15-
REAL("real"),
16-
DOUBLE_PRECISION("double precision"),
17-
BOOLEAN("boolean"),
13+
INTEGER("int4"),
14+
BIGINT("int8"),
15+
REAL("float4"),
16+
DOUBLE_PRECISION("float8"),
17+
BOOLEAN("bool"),
1818
TIMESTAMPTZ("timestamptz"),
1919
DATE("date"),
20-
UNKNOWN("unknown");
20+
UNKNOWN(null);
2121

2222
private final String sqlType;
2323

2424
PostgresDataType(String sqlType) {
2525
this.sqlType = sqlType;
2626
}
2727

28+
/**
29+
* Returns the PostgreSQL type name for use with JDBC's createArrayOf() and SQL casting.
30+
*
31+
* @return The type name (e.g., "int4", "float8", "text")
32+
*/
2833
public String getSqlType() {
2934
return sqlType;
3035
}
3136

37+
/** Returns the array type for SQL casting (e.g., "int4[]", "text[]"). */
3238
public String getArraySqlType() {
3339
return sqlType + "[]";
3440
}
@@ -38,7 +44,6 @@ public String getArraySqlType() {
3844
*
3945
* @param dataType the generic data type
4046
* @return the corresponding PostgresDataType, or null if UNSPECIFIED
41-
* @throws IllegalArgumentException if the DataType is unknown
4247
*/
4348
public static PostgresDataType fromDataType(DataType dataType) {
4449
switch (dataType) {

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresInRelationalFilterParserArrayField.java

Lines changed: 57 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -31,79 +31,101 @@ public String parse(
3131
final String parsedLhs = expression.getLhs().accept(context.lhsParser());
3232
final Iterable<Object> parsedRhs = expression.getRhs().accept(context.rhsParser());
3333

34+
// Extract element type from expression metadata for type-safe query generation
35+
String sqlType = expression.getLhs().accept(PostgresTypeExtractor.scalarType());
36+
3437
// Check if this field has been unnested - if so, treat it as a scalar
3538
ArrayIdentifierExpression arrayExpr = (ArrayIdentifierExpression) expression.getLhs();
3639
String fieldName = arrayExpr.getName();
3740
if (context.getPgColumnNames().containsKey(fieldName)) {
3841
// Field is unnested - each element is now a scalar, not an array
3942
// Use scalar IN operator instead of array overlap
4043
return prepareFilterStringForScalarInOperator(
41-
parsedLhs, parsedRhs, context.getParamsBuilder());
44+
parsedLhs, parsedRhs, sqlType, context.getParamsBuilder());
4245
}
4346

4447
// Field is NOT unnested - use array overlap logic
45-
String arrayTypeCast = expression.getLhs().accept(new PostgresArrayTypeExtractor());
4648
return prepareFilterStringForArrayInOperator(
47-
parsedLhs, parsedRhs, arrayTypeCast, context.getParamsBuilder());
49+
parsedLhs, parsedRhs, sqlType, context.getParamsBuilder());
4850
}
4951

5052
/**
5153
* Generates SQL for scalar IN operator (used when array field has been unnested). Example:
52-
* "tags_unnested" IN (?, ?, ?)
54+
* "tags_unnested" = ANY(?)
5355
*/
5456
private String prepareFilterStringForScalarInOperator(
5557
final String parsedLhs,
5658
final Iterable<Object> parsedRhs,
59+
final String sqlType,
5760
final Params.Builder paramsBuilder) {
61+
// If type is specified, use optimized ANY(ARRAY[]) syntax
62+
// Otherwise, fall back to traditional IN (?, ?, ?) for backward compatibility
63+
if (sqlType != null) {
64+
Object[] values = StreamSupport.stream(parsedRhs.spliterator(), false).toArray();
5865

59-
String placeholders =
60-
StreamSupport.stream(parsedRhs.spliterator(), false)
61-
.map(
62-
value -> {
63-
paramsBuilder.addObjectParam(value);
64-
return "?";
65-
})
66-
.collect(Collectors.joining(", "));
66+
if (values.length == 0) {
67+
return "1 = 0";
68+
}
6769

68-
// Scalar IN operator for unnested array elements
69-
return String.format("%s IN (%s)", parsedLhs, placeholders);
70+
paramsBuilder.addArrayParam(values, sqlType);
71+
return String.format("%s = ANY(?)", parsedLhs);
72+
} else {
73+
return prepareFilterStringFallback(parsedLhs, parsedRhs, paramsBuilder, "%s IN (%s)");
74+
}
7075
}
7176

7277
/**
7378
* Generates SQL for array overlap operator (used for non-unnested array fields). Example: "tags"
74-
* && ARRAY[?, ?]::text[]
79+
* && ?
80+
*
81+
* <p>Uses a single array parameter.
7582
*/
7683
private String prepareFilterStringForArrayInOperator(
7784
final String parsedLhs,
7885
final Iterable<Object> parsedRhs,
79-
final String arrayType,
86+
final String sqlType,
8087
final Params.Builder paramsBuilder) {
88+
// If type is specified, use optimized array overlap with typed array
89+
// Otherwise, fall back to jsonb-based approach for backward compatibility
90+
if (sqlType != null) {
91+
Object[] values = StreamSupport.stream(parsedRhs.spliterator(), false).toArray();
8192

82-
String placeholders =
93+
if (values.length == 0) {
94+
return "1 = 0";
95+
}
96+
97+
paramsBuilder.addArrayParam(values, sqlType);
98+
return String.format("%s && ?", parsedLhs);
99+
} else {
100+
// Fallback: cast both sides to text[] for backward compatibility with any array type
101+
return prepareFilterStringFallback(
102+
parsedLhs, parsedRhs, paramsBuilder, "%s::text[] && ARRAY[%s]::text[]");
103+
}
104+
}
105+
106+
/**
107+
* Fallback method using traditional (?, ?, ?) syntax for backward compatibility when type
108+
* information is not available.
109+
*/
110+
private String prepareFilterStringFallback(
111+
final String parsedLhs,
112+
final Iterable<Object> parsedRhs,
113+
final Params.Builder paramsBuilder,
114+
final String formatPattern) {
115+
116+
String collect =
83117
StreamSupport.stream(parsedRhs.spliterator(), false)
84118
.map(
85-
value -> {
86-
paramsBuilder.addObjectParam(value);
119+
val -> {
120+
paramsBuilder.addObjectParam(val);
87121
return "?";
88122
})
89123
.collect(Collectors.joining(", "));
90124

91-
// Use array overlap operator for array fields
92-
if (arrayType != null) {
93-
// Type-aware optimization
94-
if (arrayType.equals("text[]")) {
95-
// cast RHS to text[] otherwise JDBC binds it as character varying[].
96-
return String.format("%s && ARRAY[%s]::text[]", parsedLhs, placeholders);
97-
} else {
98-
// INTEGER/BOOLEAN arrays: No casting needed, JDBC binds them correctly
99-
// "numbers" && ARRAY[?, ?] (PostgreSQL infers integer[])
100-
// "flags" && ARRAY[?, ?] (PostgreSQL infers boolean[])
101-
return String.format("%s && ARRAY[%s]", parsedLhs, placeholders);
102-
}
103-
} else {
104-
// Fallback: Cast both LHS and RHS to text[] to avoid type mismatch issues. This has the worst
105-
// performance because casting LHS doesn't let PG use indexes on this col
106-
return String.format("%s::text[] && ARRAY[%s]::text[]", parsedLhs, placeholders);
125+
if (collect.isEmpty()) {
126+
return "1 = 0";
107127
}
128+
129+
return String.format(formatPattern, parsedLhs, collect);
108130
}
109131
}

0 commit comments

Comments
 (0)