Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@
import org.hypertrace.core.documentstore.commons.DocStoreConstants;
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayType;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
Expand Down Expand Up @@ -300,6 +302,24 @@ public Stream<Arguments> provideArguments(final ExtensionContext context) {
}
}

/**
* Provides arguments for testing array operations with different expression types. Returns:
* (datastoreName, expressionType) - "WITH_TYPE": ArrayIdentifierExpression WITH ArrayType
* (optimized, type-aware casting) - "WITHOUT_TYPE": ArrayIdentifierExpression WITHOUT ArrayType
* (fallback, text[] casting)
*/
private static class PostgresArrayTypeProvider implements ArgumentsProvider {

@Override
public Stream<Arguments> provideArguments(final ExtensionContext context) {
return Stream.of(
Arguments.of(POSTGRES_STORE, "WITH_TYPE"), // ArrayIdentifierExpression WITH ArrayType
Arguments.of(
POSTGRES_STORE, "WITHOUT_TYPE") // ArrayIdentifierExpression WITHOUT ArrayType
);
}
}

@ParameterizedTest
@ArgumentsSource(AllProvider.class)
public void testFindAll(String dataStoreName) throws IOException {
Expand Down Expand Up @@ -3267,6 +3287,228 @@ void testFlatPostgresCollectionCount(String dataStoreName) {
assertEquals(3, soapCountQuery);
}

/**
* Tests IN and NOT_IN operators on primitive (non-JSON) fields in flat collections. These
* operators should use simple SQL IN clause instead of array overlap operator for optimal index
* usage.
*/
@ParameterizedTest
@ArgumentsSource(PostgresProvider.class)
void testFlatPostgresCollectionInAndNotInOperators(String dataStoreName) {
Datastore datastore = datastoreMap.get(dataStoreName);
Collection flatCollection =
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);

// Test 1: IN operator on _id field
// Expected: 3 documents (IDs 1, 3, 5)
Query idInQuery =
Query.builder()
.setFilter(
RelationalExpression.of(
IdentifierExpression.of("_id"),
IN,
ConstantExpression.ofNumbers(List.of(1, 3, 5))))
.build();

long idInCount = flatCollection.count(idInQuery);
assertEquals(3, idInCount, "IN operator on _id should find 3 documents");

// Test 2: IN operator on item field (string)
// Expected: 5 documents (IDs 1, 3, 4 for Shampoo and 1, 5, 8 for Soap)
Query itemInQuery =
Query.builder()
.setFilter(
RelationalExpression.of(
IdentifierExpression.of("item"),
IN,
ConstantExpression.ofStrings(List.of("Soap", "Shampoo"))))
.build();

long itemInCount = flatCollection.count(itemInQuery);
assertEquals(
5, itemInCount, "IN operator on item should find 5 documents (3 Soap + 2 Shampoo)");

// Test 3: IN operator on price field (numeric)
// Expected: 5 documents (IDs 1, 8 for price=10 and 3, 4 for price=5)
Query priceInQuery =
Query.builder()
.setFilter(
RelationalExpression.of(
IdentifierExpression.of("price"),
IN,
ConstantExpression.ofNumbers(List.of(5, 10))))
.build();

long priceInCount = flatCollection.count(priceInQuery);
assertEquals(4, priceInCount, "IN operator on price should find 4 documents");

// Test 4: NOT_IN operator on _id field
// Expected: 7 documents (all except IDs 1, 3, 5)
Query idNotInQuery =
Query.builder()
.setFilter(
RelationalExpression.of(
IdentifierExpression.of("_id"),
NOT_IN,
ConstantExpression.ofNumbers(List.of(1, 3, 5))))
.build();

long idNotInCount = flatCollection.count(idNotInQuery);
assertEquals(7, idNotInCount, "NOT_IN operator on _id should find 7 documents");

// Test 5: NOT_IN operator on item field
// Expected: 5 documents (all except Soap items: IDs 2, 3, 4, 6, 7, 9, 10)
Query itemNotInQuery =
Query.builder()
.setFilter(
RelationalExpression.of(
IdentifierExpression.of("item"),
NOT_IN,
ConstantExpression.ofStrings(List.of("Soap"))))
.build();

long itemNotInCount = flatCollection.count(itemNotInQuery);
assertEquals(7, itemNotInCount, "NOT_IN operator on item should find 7 documents");

// Test 6: Combined IN with other filters (AND)
// Filter: _id IN (1, 3, 5, 7) AND price >= 10
// Expected: 2 documents (ID 1 with price=10, ID 5 with price=20)
Query combinedQuery =
Query.builder()
.setFilter(
LogicalExpression.builder()
.operator(LogicalOperator.AND)
.operand(
RelationalExpression.of(
IdentifierExpression.of("_id"),
IN,
ConstantExpression.ofNumbers(List.of(1, 3, 5, 7))))
.operand(
RelationalExpression.of(
IdentifierExpression.of("price"), GTE, ConstantExpression.of(10)))
.build())
.build();

long combinedCount = flatCollection.count(combinedQuery);
assertEquals(2, combinedCount, "Combined IN with >= filter should find 2 documents");
}

/**
* Tests IN and NOT_IN operators on array fields in flat collections. Array fields use the
* PostgreSQL array overlap operator (&&) for IN operations, which checks if the array contains
* ANY of the provided values.
*
* <p>This test is parameterized to test three scenarios: 1. ArrayIdentifierExpression WITH
* ArrayType - optimized queries with type-aware casting 2. ArrayIdentifierExpression WITHOUT
* ArrayType - fallback with text[] casting both sides 3. IdentifierExpression - backward
* compatibility with text[] casting both sides
*/
@ParameterizedTest
@ArgumentsSource(PostgresArrayTypeProvider.class)
void testFlatPostgresCollectionInAndNotInOperatorsForArrays(
String dataStoreName, String expressionType) {
Datastore datastore = datastoreMap.get(dataStoreName);
Collection flatCollection =
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);

String typeDesc =
expressionType.equals("WITH_TYPE")
? "WITH ArrayType (optimized)"
: "WITHOUT ArrayType (fallback)";

// Test 1: IN operator on tags array field (string array)
// Find documents where tags contains "hygiene" OR "grooming"
// Expected: IDs 1, 5, 8 (hygiene) + IDs 6, 7 (grooming) = 5 documents
Query tagsInQuery =
Query.builder()
.setFilter(
RelationalExpression.of(
expressionType.equals("WITH_TYPE")
? ArrayIdentifierExpression.of("tags", ArrayType.TEXT)
: ArrayIdentifierExpression.of("tags"),
IN,
ConstantExpression.ofStrings(List.of("hygiene", "grooming"))))
.build();

long tagsInCount = flatCollection.count(tagsInQuery);
assertEquals(
5,
tagsInCount,
String.format(
"IN operator on tags array %s should find 5 documents with hygiene or grooming",
typeDesc));

// Test 2: IN operator on numbers array field (numeric array)
// Find documents where numbers array contains 1 OR 10
// Expected: ID 1 has {1,2,3}, ID 2 has {10,20} = 2 documents
Query numbersInQuery =
Query.builder()
.setFilter(
RelationalExpression.of(
expressionType.equals("WITH_TYPE")
? ArrayIdentifierExpression.of("numbers", ArrayType.INTEGER)
: ArrayIdentifierExpression.of("numbers"),
IN,
ConstantExpression.ofNumbers(List.of(1, 10))))
.build();

long numbersInCount = flatCollection.count(numbersInQuery);
assertEquals(
2,
numbersInCount,
String.format("IN operator on numbers array %s should find 2 documents", typeDesc));

// Test 3: NOT_IN operator on tags array field
// Find documents where tags does NOT contain "hygiene"
// Expected: All documents except IDs 1, 5, 8 = 7 documents
// Note: This includes NULL tags (ID 9) and empty array (ID 10)
Query tagsNotInQuery =
Query.builder()
.setFilter(
RelationalExpression.of(
expressionType.equals("WITH_TYPE")
? ArrayIdentifierExpression.of("tags", ArrayType.TEXT)
: ArrayIdentifierExpression.of("tags"),
NOT_IN,
ConstantExpression.ofStrings(List.of("hygiene"))))
.build();

long tagsNotInCount = flatCollection.count(tagsNotInQuery);
assertEquals(
7,
tagsNotInCount,
String.format(
"NOT_IN operator on tags array %s should find 7 documents without hygiene",
typeDesc));

// Test 4: Combined array IN with scalar filter
// Find documents where tags contains "premium" AND price >= 5
// Expected: ID 1 (premium, price=10) + ID 3 (premium, price=5) = 2 documents
Query combinedArrayQuery =
Query.builder()
.setFilter(
LogicalExpression.builder()
.operator(LogicalOperator.AND)
.operand(
RelationalExpression.of(
expressionType.equals("WITH_TYPE")
? ArrayIdentifierExpression.of("tags", ArrayType.TEXT)
: ArrayIdentifierExpression.of("tags"),
IN,
ConstantExpression.ofStrings(List.of("premium"))))
.operand(
RelationalExpression.of(
IdentifierExpression.of("price"), GTE, ConstantExpression.of(5)))
.build())
.build();

long combinedArrayCount = flatCollection.count(combinedArrayQuery);
assertEquals(
2,
combinedArrayCount,
String.format("Combined array IN with >= filter %s should find 2 documents", typeDesc));
}

/**
* This test is disabled for now because flat collections do not support search on nested
* queries in JSONB fields (ex. props.brand)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.hypertrace.core.documentstore.expression.impl;

import java.util.Optional;
import lombok.EqualsAndHashCode;
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;

/**
* Represents an identifier expression for array-typed fields. This allows parsers to apply
Expand All @@ -12,11 +14,36 @@
@EqualsAndHashCode(callSuper = true)
public class ArrayIdentifierExpression extends IdentifierExpression {

private final ArrayType arrayType;

public ArrayIdentifierExpression(String name) {
this(name, null);
}

public ArrayIdentifierExpression(String name, ArrayType arrayType) {
super(name);
this.arrayType = arrayType;
}

public static ArrayIdentifierExpression of(String name) {
return new ArrayIdentifierExpression(name);
}

public static ArrayIdentifierExpression of(String name, ArrayType arrayType) {
return new ArrayIdentifierExpression(name, arrayType);
}

/** Returns the array type if specified, empty otherwise */
public Optional<ArrayType> getArrayType() {
return Optional.ofNullable(arrayType);
}

/**
* Accepts a SelectTypeExpressionVisitor and dispatches to the ArrayIdentifierExpression-specific
* visit method.
*/
@Override
public <T> T accept(final SelectTypeExpressionVisitor visitor) {
return visitor.visit(this);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.hypertrace.core.documentstore.expression.impl;

import lombok.Getter;

public enum ArrayType {
TEXT("text[]"),
INTEGER("integer[]"),
BOOLEAN("boolean[]"),
DOUBLE_PRECISION("double precision[]");

@Getter private final String postgresType;

ArrayType(String postgresType) {
this.postgresType = postgresType;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.hypertrace.core.documentstore.parser.FieldTransformationVisitor;
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;
import org.hypertrace.core.documentstore.postgres.utils.BasicPostgresSecurityValidator;

/**
Expand Down Expand Up @@ -68,6 +69,15 @@ public <T> T accept(final FieldTransformationVisitor<T> visitor) {
return visitor.visit(this);
}

/**
* Accepts a SelectTypeExpressionVisitor and dispatches to the JsonIdentifierExpression-specific
* visit method.
*/
@Override
public <T> T accept(final SelectTypeExpressionVisitor visitor) {
return visitor.visit(this);
}

@Override
public String toString() {
return String.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression.DocumentConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;

public interface SelectTypeExpressionVisitor {
<T> T visit(final AggregateExpression expression);
Expand All @@ -19,4 +21,20 @@ public interface SelectTypeExpressionVisitor {
<T> T visit(final IdentifierExpression expression);

<T> T visit(final AliasedIdentifierExpression expression);

/**
* Visit an ArrayIdentifierExpression. Default implementation delegates to
* visit(IdentifierExpression) since ArrayIdentifierExpression extends IdentifierExpression.
*/
default <T> T visit(final ArrayIdentifierExpression expression) {
return visit((IdentifierExpression) expression);
}

/**
* Visit a JsonIdentifierExpression. Default implementation delegates to
* visit(IdentifierExpression) since JsonIdentifierExpression extends IdentifierExpression.
*/
default <T> T visit(final JsonIdentifierExpression expression) {
return visit((IdentifierExpression) expression);
}
}
Loading
Loading