Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
674d2ee
WIP
suddendust Sep 22, 2025
f6cb67a
Revert "WIP"
suddendust Sep 22, 2025
6e7795d
Reapply "WIP"
suddendust Sep 22, 2025
957a897
Revert "Reapply "WIP""
suddendust Sep 22, 2025
502c373
Reapply "Reapply "WIP""
suddendust Sep 22, 2025
0544358
WIP
suddendust Sep 22, 2025
82dc37f
WIP
suddendust Sep 22, 2025
c0ed72a
Add default impl for `getDocumentType`
suddendust Sep 22, 2025
e9f25da
WIP
suddendust Sep 22, 2025
e6be8b9
Merge branch 'pg_subdocument' of github.com:suddendust/document-store…
suddendust Sep 22, 2025
ee7678f
Rollback unnecessary changes
suddendust Sep 22, 2025
63b0dec
Added default DocumentType in Document.java
suddendust Sep 22, 2025
7f91cf0
Address comments
suddendust Sep 23, 2025
7376b56
Merge branch 'main' into pg_subdocument
suddendust Sep 24, 2025
ea2aefd
Address comments
suddendust Sep 24, 2025
e3ccb5b
Merge remote-tracking branch 'myfork/pg_subdocument' into pg_subdocument
suddendust Sep 24, 2025
3e3e6cf
Address docs
suddendust Sep 24, 2025
e6a6f7e
Adds more test cases in JSONDocumentTest.java
suddendust Sep 24, 2025
483eb16
Spotless
suddendust Sep 24, 2025
e5926ea
Added more test cases
suddendust Sep 24, 2025
b261463
Spotless
suddendust Sep 24, 2025
7cf8e17
Add test case for PostgresResultIteratorWithBasicTypes
suddendust Sep 24, 2025
9c474db
Merge branch 'main' into pg_subdocument
suddendust Sep 24, 2025
cb2ed18
WIP
suddendust Sep 24, 2025
06fdc8f
Merge branch 'pg_subdocument' into poc
suddendust Sep 24, 2025
fbce388
WIP
suddendust Sep 24, 2025
50b09b7
Merge branch 'main' of github.com:hypertrace/document-store into refa…
suddendust Sep 24, 2025
0f8df71
WIP
suddendust Sep 24, 2025
7f7defe
WIP
suddendust Sep 24, 2025
13e591e
Added test cases
suddendust Sep 26, 2025
e1074b9
Added 1 more integration test case to ensure result consistency b/w f…
suddendust Sep 26, 2025
77e1473
Added test cases for unnest
suddendust Sep 26, 2025
70763bb
Merge branch 'main' into refactor
suddendust Oct 2, 2025
7f76f74
Address comments except integration test
suddendust Oct 2, 2025
40946ad
Revert "Address comments except integration test"
suddendust Oct 2, 2025
41a405f
Merge branch 'refactor' of github.com:suddendust/document-store into …
suddendust Oct 2, 2025
1962554
Reapply "Address comments except integration test"
suddendust Oct 2, 2025
1903a4a
WIP
suddendust Oct 2, 2025
c3d0a32
Test cases working
suddendust Oct 2, 2025
0fa2a62
WIP
suddendust Oct 2, 2025
b6a4cf6
WIP
suddendust Oct 2, 2025
d2fef95
Add more test cases
suddendust Oct 2, 2025
de09a90
Add more test cases
suddendust Oct 2, 2025
4f5e40d
WIP
suddendust Oct 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"statements": [
"INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n1, 'Soap', 10, 2, '2014-03-01T08:00:00Z',\n'{\"hygiene\", \"personal-care\", \"premium\"}',\n'{\"colors\": [\"Blue\", \"Green\"], \"brand\": \"Dettol\", \"size\": \"M\", \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL\n)",
"INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n2, 'Mirror', 20, 1, '2014-03-01T09:00:00Z',\n'{\"home-decor\", \"reflective\", \"glass\"}',\nNULL,\nNULL\n)",
"INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n3, 'Shampoo', 5, 10, '2014-03-15T09:00:00Z',\n'{\"hair-care\", \"personal-care\", \"premium\", \"herbal\"}',\n'{\"colors\": [\"Black\"], \"brand\": \"Sunsilk\", \"size\": \"L\", \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL\n)",
"INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n4, 'Shampoo', 5, 20, '2014-04-04T11:21:39.736Z',\n'{\"hair-care\", \"budget\", \"bulk\"}',\nNULL,\nNULL\n)",
"INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n5, 'Soap', 20, 5, '2014-04-04T21:23:13.331Z',\n'{\"hygiene\", \"antibacterial\", \"family-pack\"}',\n'{\"colors\": [\"Orange\", \"Blue\"], \"brand\": \"Lifebuoy\", \"size\": \"S\", \"seller\": {\"name\": \"Hans and Co.\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL\n)",
"INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n6, 'Comb', 7.5, 5, '2015-06-04T05:08:13Z',\n'{\"grooming\", \"plastic\", \"essential\"}',\nNULL,\nNULL\n)",
"INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n7, 'Comb', 7.5, 10, '2015-09-10T08:43:00Z',\n'{\"grooming\", \"bulk\", \"wholesale\"}',\n'{\"colors\": [], \"seller\": {\"name\": \"Go Go Plastics\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL\n)",
"INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n8, 'Soap', 10, 5, '2016-02-06T20:20:13Z',\n'{\"hygiene\", \"budget\", \"basic\"}',\nNULL,\nNULL\n)"
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.hypertrace.core.documentstore.metric.DocStoreMetricProvider;

public interface Datastore {

Set<String> listCollections();

boolean createCollection(String collectionName, Map<String, String> options);
Expand All @@ -19,4 +20,19 @@ public interface Datastore {
DocStoreMetricProvider getDocStoreMetricProvider();

void close();

/**
* Returns a collection with the given name and type. A type can be used to specify different
* storage modes of the collection. For example, a collection can have all top-level fields or a
* single JSON column that contains all the fields. Both collections are handled differently in
* this case.
*
* @param collectionName name of the collection
* @param documentType type of the collection. For PG, we support FLAT and Legacy (for backward
* compatibility)
* @return the corresponding collection impl
*/
default Collection getCollectionForType(String collectionName, DocumentType documentType) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you please add comment on other methods as well.
for eg: what would be the behavior of getCollection(), createCollection()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not see a corresponding implementation change in MongoDatastore.
If we upgrade document-store in entity-service, is the expectation that we would getCollection for mongodb collection and getCollectionForType for postgres collection?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1
How is this supposed to be used by the clients/consumers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.is the expectation that we would getCollection for mongodb collection and getCollectionForType for postgres collection?

That's the expectation right now, yes. Now that I think of it, a better approach would be to perhaps, for Mongo, to return an instance of MongoCollection for type NESTED and an IllegalArgumentException otherwise.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@suresh-prakash This API is intended to be primarily used for SQL datastores like PG in which we can store both flat and nested documents. For Mongo, since we only support nested, it'll throw an IllegalArgumentException for FLAT doc types (as per this PR).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, I am a bit confused here still. The consumers are the ones who are choosing which of the Postgres FLAT/NESTED to use. So, why would the clients again ask the library which one is being used. I mean, the clients would already have this information with them, right?

throw new UnsupportedOperationException("Unsupported collection type: " + documentType);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.bson.Document;
import org.hypertrace.core.documentstore.Collection;
import org.hypertrace.core.documentstore.Datastore;
import org.hypertrace.core.documentstore.DocumentType;
import org.hypertrace.core.documentstore.metric.DocStoreMetricProvider;
import org.hypertrace.core.documentstore.metric.mongo.MongoDocStoreMetricProvider;
import org.hypertrace.core.documentstore.model.config.ConnectionConfig;
Expand All @@ -25,6 +26,7 @@

@Slf4j
public class MongoDatastore implements Datastore {

private static final Logger LOGGER = LoggerFactory.getLogger(MongoDatastore.class);

private final ConnectionConfig connectionConfig;
Expand Down Expand Up @@ -78,6 +80,16 @@ public boolean deleteCollection(String collectionName) {
return true;
}

@Override
public Collection getCollectionForType(String collectionName, DocumentType documentType) {
// We support nested mongo docs
if (documentType == DocumentType.NESTED) {
return getCollection(collectionName);
}
throw new IllegalArgumentException(
"Unsupported document type: " + documentType + " for Mongo collection");
}

@Override
public Collection getCollection(String collectionName) {
return new MongoCollection(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.hypertrace.core.documentstore.postgres;

import org.hypertrace.core.documentstore.CloseableIterator;
import org.hypertrace.core.documentstore.Document;
import org.hypertrace.core.documentstore.model.options.QueryOptions;
import org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser;
import org.hypertrace.core.documentstore.postgres.query.v1.transformer.FlatPostgresFieldTransformer;
import org.hypertrace.core.documentstore.query.Query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* PostgreSQL collection implementation for flat documents. All fields are stored as top-level
* PostgreSQL columns.
*/
// todo: Throw unsupported op exception for all write methods
public class FlatPostgresCollection extends PostgresCollection {

private static final Logger LOGGER = LoggerFactory.getLogger(FlatPostgresCollection.class);

FlatPostgresCollection(final PostgresClient client, final String collectionName) {
super(client, collectionName);
}

@Override
public CloseableIterator<Document> query(
final org.hypertrace.core.documentstore.query.Query query, final QueryOptions queryOptions) {
PostgresQueryParser queryParser = createParser(query);
return queryWithParser(query, queryParser);
}

@Override
public CloseableIterator<Document> find(
final org.hypertrace.core.documentstore.query.Query query) {
PostgresQueryParser queryParser = createParser(query);
return queryWithParser(query, queryParser);
}

@Override
public long count(
org.hypertrace.core.documentstore.query.Query query, QueryOptions queryOptions) {
PostgresQueryParser queryParser =
new PostgresQueryParser(
tableIdentifier,
query,
new org.hypertrace.core.documentstore.postgres.query.v1.transformer
.FlatPostgresFieldTransformer());
return countWithParser(query, queryParser);
}

private PostgresQueryParser createParser(Query query) {
return new PostgresQueryParser(
tableIdentifier,
PostgresQueryExecutor.transformAndLog(query),
new FlatPostgresFieldTransformer());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.hypertrace.core.documentstore.postgres;

import org.hypertrace.core.documentstore.CloseableIterator;
import org.hypertrace.core.documentstore.Document;
import org.hypertrace.core.documentstore.model.options.QueryOptions;
import org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser;
import org.hypertrace.core.documentstore.postgres.query.v1.transformer.NestedPostgresColTransformer;
import org.hypertrace.core.documentstore.query.Query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* PostgreSQL collection implementation for legacy document storage mode. Fields are stored within
* JSONB columns.
*/
public class NestedPostgresCollection extends PostgresCollection {

private static final Logger LOGGER = LoggerFactory.getLogger(NestedPostgresCollection.class);

NestedPostgresCollection(final PostgresClient client, final String collectionName) {
super(client, collectionName);
}

@Override
public CloseableIterator<Document> query(
final org.hypertrace.core.documentstore.query.Query query, final QueryOptions queryOptions) {
PostgresQueryParser queryParser = createParser(query);
return queryWithParser(query, queryParser);
}

@Override
public CloseableIterator<Document> find(
final org.hypertrace.core.documentstore.query.Query query) {
PostgresQueryParser queryParser = createParser(query);
return queryWithParser(query, queryParser);
}

@Override
public long count(
org.hypertrace.core.documentstore.query.Query query, QueryOptions queryOptions) {
PostgresQueryParser queryParser = createParser(query);
return countWithParser(query, queryParser);
}

private PostgresQueryParser createParser(Query query) {
return new PostgresQueryParser(
tableIdentifier,
PostgresQueryExecutor.transformAndLog(query),
new NestedPostgresColTransformer());
}
}
Loading
Loading