Skip to content

Commit 015ee5c

Browse files
authored
feat: add support for self-joins with sub-query (#220)
* feat: add support for self-joins * chore: address review comments * refactor: api changes for join expression * style: spotless formatting * feat: implementation for sub query joins within the same collection * chore: complete the impl for sub query self join * test: add integration test for self join with sub-query * chore: cleaned up code and added docs * chore: address review comments * chore: address review comments p2 * test: add integration test to verify self join with sub-query works for nested fields as well * chore: update collection data for integration test * refactor: move inner class in MongoLetClauseBuilder below the outer class methods
1 parent 577aaac commit 015ee5c

29 files changed

+904
-129
lines changed

document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,16 @@
8080
import java.util.stream.StreamSupport;
8181
import org.hypertrace.core.documentstore.commons.DocStoreConstants;
8282
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
83+
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
8384
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
8485
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
8586
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
8687
import org.hypertrace.core.documentstore.expression.impl.KeyExpression;
8788
import org.hypertrace.core.documentstore.expression.impl.LogicalExpression;
8889
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
90+
import org.hypertrace.core.documentstore.expression.impl.SubQueryJoinExpression;
8991
import org.hypertrace.core.documentstore.expression.impl.UnnestExpression;
92+
import org.hypertrace.core.documentstore.expression.operators.AggregationOperator;
9093
import org.hypertrace.core.documentstore.expression.operators.FunctionOperator;
9194
import org.hypertrace.core.documentstore.expression.operators.RelationalOperator;
9295
import org.hypertrace.core.documentstore.expression.type.FilterTypeExpression;
@@ -3478,6 +3481,182 @@ public void testToLowerCaseMongoFunctionOperator(String dataStoreName) throws Ex
34783481
dataStoreName, resultDocs, "query/case_insensitive_exact_match_response.json", 2);
34793482
}
34803483

3484+
@ParameterizedTest
3485+
@ArgumentsSource(MongoProvider.class)
3486+
void testSelfJoinWithSubQuery(String dataStoreName) throws IOException {
3487+
Collection collection = getCollection(dataStoreName);
3488+
3489+
/*
3490+
This is the query we want to execute:
3491+
SELECT item, quantity, date
3492+
FROM <implicit_collection>
3493+
JOIN (
3494+
SELECT item, MAX(date) AS latest_date
3495+
FROM <implicit_collection>
3496+
GROUP BY item
3497+
) latest
3498+
ON item = latest.item
3499+
AND date = latest.latest_date
3500+
ORDER BY `item` ASC;
3501+
*/
3502+
3503+
/*
3504+
The right subquery:
3505+
SELECT item, MAX(date) AS latest_date
3506+
FROM <implicit_collection>
3507+
GROUP BY item
3508+
*/
3509+
Query subQuery =
3510+
Query.builder()
3511+
.addSelection(SelectionSpec.of(IdentifierExpression.of("item")))
3512+
.addSelection(
3513+
SelectionSpec.of(
3514+
AggregateExpression.of(
3515+
AggregationOperator.MAX, IdentifierExpression.of("date")),
3516+
"latest_date"))
3517+
.addAggregation(IdentifierExpression.of("item"))
3518+
.build();
3519+
3520+
/*
3521+
The FROM expression representing a join with the right subquery:
3522+
FROM <implicit_collection>
3523+
JOIN (
3524+
SELECT item, MAX(date) AS latest_date
3525+
FROM <implicit_collection>
3526+
GROUP BY item
3527+
) latest
3528+
ON item = latest.item
3529+
AND date = latest.latest_date;
3530+
*/
3531+
SubQueryJoinExpression subQueryJoinExpression =
3532+
SubQueryJoinExpression.builder()
3533+
.subQuery(subQuery)
3534+
.subQueryAlias("latest")
3535+
.joinCondition(
3536+
LogicalExpression.and(
3537+
RelationalExpression.of(
3538+
IdentifierExpression.of("item"),
3539+
RelationalOperator.EQ,
3540+
AliasedIdentifierExpression.builder()
3541+
.name("item")
3542+
.contextAlias("latest")
3543+
.build()),
3544+
RelationalExpression.of(
3545+
IdentifierExpression.of("date"),
3546+
RelationalOperator.EQ,
3547+
AliasedIdentifierExpression.builder()
3548+
.name("latest_date")
3549+
.contextAlias("latest")
3550+
.build())))
3551+
.build();
3552+
3553+
/*
3554+
Now build the top-level Query:
3555+
SELECT item, quantity, date FROM <subQueryJoinExpression> ORDER BY `item` ASC;
3556+
*/
3557+
Query mainQuery =
3558+
Query.builder()
3559+
.addSelection(IdentifierExpression.of("item"))
3560+
.addSelection(IdentifierExpression.of("quantity"))
3561+
.addSelection(IdentifierExpression.of("date"))
3562+
.addFromClause(subQueryJoinExpression)
3563+
.addSort(IdentifierExpression.of("item"), ASC)
3564+
.build();
3565+
3566+
Iterator<Document> iterator = collection.aggregate(mainQuery);
3567+
assertDocsAndSizeEqual(
3568+
dataStoreName, iterator, "query/self_join_with_sub_query_response.json", 4);
3569+
}
3570+
3571+
@ParameterizedTest
3572+
@ArgumentsSource(MongoProvider.class)
3573+
void testSelfJoinWithSubQueryWithNestedFields(String dataStoreName) throws IOException {
3574+
createCollectionData(
3575+
"query/items_data_with_nested_fields.json", "items_data_with_nested_fields");
3576+
Collection collection = getCollection(dataStoreName, "items_data_with_nested_fields");
3577+
3578+
/*
3579+
This is the query we want to execute:
3580+
SELECT itemDetails.item, itemDetails.quantity, itemDetails.date
3581+
FROM <implicit_collection>
3582+
JOIN (
3583+
SELECT itemDetails.item, MAX(itemDetails.date) AS latest_date
3584+
FROM <implicit_collection>
3585+
GROUP BY itemDetails.item
3586+
) latest
3587+
ON itemDetails.item = latest.itemDetails.item
3588+
AND itemDetails.date = latest.latest_date
3589+
ORDER BY `itemDetails.item` ASC;
3590+
*/
3591+
3592+
/*
3593+
The right subquery:
3594+
SELECT itemDetails.item, MAX(itemDetails.date) AS latest_date
3595+
FROM <implicit_collection>
3596+
GROUP BY itemDetails.item
3597+
*/
3598+
Query subQuery =
3599+
Query.builder()
3600+
.addSelection(SelectionSpec.of(IdentifierExpression.of("itemDetails.item")))
3601+
.addSelection(
3602+
SelectionSpec.of(
3603+
AggregateExpression.of(
3604+
AggregationOperator.MAX, IdentifierExpression.of("itemDetails.date")),
3605+
"latest_date"))
3606+
.addAggregation(IdentifierExpression.of("itemDetails.item"))
3607+
.build();
3608+
3609+
/*
3610+
The FROM expression representing a join with the right subquery:
3611+
FROM <implicit_collection>
3612+
JOIN (
3613+
SELECT itemDetails.item, MAX(itemDetails.date) AS latest_date
3614+
FROM <implicit_collection>
3615+
GROUP BY itemDetails.item
3616+
) latest
3617+
ON itemDetails.item = latest.itemDetails.item
3618+
AND itemDetails.date = latest.latest_date;
3619+
*/
3620+
SubQueryJoinExpression subQueryJoinExpression =
3621+
SubQueryJoinExpression.builder()
3622+
.subQuery(subQuery)
3623+
.subQueryAlias("latest")
3624+
.joinCondition(
3625+
LogicalExpression.and(
3626+
RelationalExpression.of(
3627+
IdentifierExpression.of("itemDetails.item"),
3628+
RelationalOperator.EQ,
3629+
AliasedIdentifierExpression.builder()
3630+
.name("itemDetails.item")
3631+
.contextAlias("latest")
3632+
.build()),
3633+
RelationalExpression.of(
3634+
IdentifierExpression.of("itemDetails.date"),
3635+
RelationalOperator.EQ,
3636+
AliasedIdentifierExpression.builder()
3637+
.name("latest_date")
3638+
.contextAlias("latest")
3639+
.build())))
3640+
.build();
3641+
3642+
/*
3643+
Now build the top-level Query:
3644+
SELECT itemDetails.item, itemDetails.quantity, itemDetails.date FROM <subQueryJoinExpression> ORDER BY `itemDetails.item` ASC;
3645+
*/
3646+
Query mainQuery =
3647+
Query.builder()
3648+
.addSelection(IdentifierExpression.of("itemDetails.item"))
3649+
.addSelection(IdentifierExpression.of("itemDetails.quantity"))
3650+
.addSelection(IdentifierExpression.of("itemDetails.date"))
3651+
.addFromClause(subQueryJoinExpression)
3652+
.addSort(IdentifierExpression.of("itemDetails.item"), ASC)
3653+
.build();
3654+
3655+
Iterator<Document> iterator = collection.aggregate(mainQuery);
3656+
assertDocsAndSizeEqual(
3657+
dataStoreName, iterator, "query/sub_query_join_response_with_nested_fields.json", 3);
3658+
}
3659+
34813660
private static Collection getCollection(final String dataStoreName) {
34823661
return getCollection(dataStoreName, COLLECTION_NAME);
34833662
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
[
2+
{
3+
"_id": 1,
4+
"itemDetails": {
5+
"item": "Comb",
6+
"date": "2012-01-01",
7+
"quantity": 10
8+
}
9+
},
10+
{
11+
"_id": 2,
12+
"itemDetails": {
13+
"item": "Shampoo",
14+
"date": "2012-01-01",
15+
"quantity": 10
16+
}
17+
},
18+
{
19+
"_id": 3,
20+
"itemDetails": {
21+
"item": "Shampoo",
22+
"date": "2012-02-02",
23+
"quantity": 20
24+
}
25+
},
26+
{
27+
"_id": 4,
28+
"itemDetails": {
29+
"item": "Shampoo",
30+
"date": "2012-03-03",
31+
"quantity": 30
32+
}
33+
},
34+
{
35+
"_id": 5,
36+
"itemDetails": {
37+
"item": "Soap",
38+
"date": "2012-02-02",
39+
"quantity": 20
40+
}
41+
},
42+
{
43+
"_id": 6,
44+
"itemDetails": {
45+
"item": "Soap",
46+
"date": "2012-01-01",
47+
"quantity": 10
48+
}
49+
}
50+
]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[
2+
{
3+
"date": "2015-09-10T08:43:00Z",
4+
"item": "Comb",
5+
"quantity": 10
6+
},
7+
{
8+
"date": "2014-03-01T09:00:00Z",
9+
"item": "Mirror",
10+
"quantity": 1
11+
},
12+
{
13+
"date": "2014-04-04T11:21:39.736Z",
14+
"item": "Shampoo",
15+
"quantity": 20
16+
},
17+
{
18+
"date": "2016-02-06T20:20:13Z",
19+
"item": "Soap",
20+
"quantity": 5
21+
}
22+
]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
[
2+
{
3+
"itemDetails": {
4+
"item": "Comb",
5+
"date": "2012-01-01",
6+
"quantity": 10
7+
}
8+
},
9+
{
10+
"itemDetails": {
11+
"item": "Shampoo",
12+
"date": "2012-03-03",
13+
"quantity": 30
14+
}
15+
},
16+
{
17+
"itemDetails": {
18+
"item": "Soap",
19+
"date": "2012-02-02",
20+
"quantity": 20
21+
}
22+
}
23+
]
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package org.hypertrace.core.documentstore.expression.impl;
2+
3+
import com.google.common.base.Preconditions;
4+
import lombok.Value;
5+
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;
6+
7+
/**
8+
* Expression for referencing an identifier/column name within a context having an alias.
9+
*
10+
* <p>Example: In this query: <code>
11+
* SELECT item, quantity, date
12+
* FROM <implicit_collection>
13+
* JOIN (
14+
* SELECT item, MAX(date) AS latest_date
15+
* FROM <implicit_collection>
16+
* GROUP BY item
17+
* ) AS latest
18+
* ON item = latest.item
19+
* ORDER BY `item` ASC;
20+
* </code> the rhs of the join condition "latest.item" can be expressed as: <code>
21+
* AliasedIdentifierExpression.builder().name("item").alias("alias1").build() </code>
22+
*/
23+
@Value
24+
public class AliasedIdentifierExpression extends IdentifierExpression {
25+
String contextAlias;
26+
27+
private AliasedIdentifierExpression(final String name, final String contextAlias) {
28+
super(name);
29+
this.contextAlias = contextAlias;
30+
}
31+
32+
@Override
33+
public <T> T accept(final SelectTypeExpressionVisitor visitor) {
34+
return visitor.visit(this);
35+
}
36+
37+
@Override
38+
public String toString() {
39+
return "`" + getContextAlias() + "." + getName() + "`";
40+
}
41+
42+
public static AliasedIdentifierExpressionBuilder builder() {
43+
return new AliasedIdentifierExpressionBuilder();
44+
}
45+
46+
public static class AliasedIdentifierExpressionBuilder {
47+
private String name;
48+
private String contextAlias;
49+
50+
public AliasedIdentifierExpressionBuilder name(final String name) {
51+
this.name = name;
52+
return this;
53+
}
54+
55+
public AliasedIdentifierExpressionBuilder contextAlias(final String contextAlias) {
56+
this.contextAlias = contextAlias;
57+
return this;
58+
}
59+
60+
public AliasedIdentifierExpression build() {
61+
Preconditions.checkArgument(
62+
this.name != null && !this.name.isBlank(), "name is null or blank");
63+
Preconditions.checkArgument(
64+
this.contextAlias != null && !this.contextAlias.isBlank(),
65+
"contextAlias is null or blank");
66+
return new AliasedIdentifierExpression(this.name, this.contextAlias);
67+
}
68+
}
69+
}

document-store/src/main/java/org/hypertrace/core/documentstore/expression/impl/IdentifierExpression.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import lombok.AccessLevel;
55
import lombok.AllArgsConstructor;
66
import lombok.Value;
7+
import lombok.experimental.NonFinal;
78
import org.hypertrace.core.documentstore.expression.type.GroupTypeExpression;
89
import org.hypertrace.core.documentstore.expression.type.SelectTypeExpression;
910
import org.hypertrace.core.documentstore.expression.type.SortTypeExpression;
@@ -17,7 +18,8 @@
1718
* <p>Example: IdentifierExpression.of("col1");
1819
*/
1920
@Value
20-
@AllArgsConstructor(access = AccessLevel.PRIVATE)
21+
@NonFinal
22+
@AllArgsConstructor(access = AccessLevel.PROTECTED)
2123
public class IdentifierExpression
2224
implements GroupTypeExpression, SelectTypeExpression, SortTypeExpression {
2325

0 commit comments

Comments
 (0)