Skip to content

Commit e2f2668

Browse files
committed
Update find and Rerank, start reworking the tests
1 parent cd00bbb commit e2f2668

File tree

105 files changed

+2090
-1273
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+2090
-1273
lines changed

TEST.MD

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
2+
## Run against Local HCD Instance
3+
(_to run locally your must have the data api running locally on op of HCD or DSE_)
4+
5+
```bash
6+
mvn clean test -Ptest_local
7+
```
8+
9+
> **Why it works ?**
10+
>
11+
> - In `pom.xml` the environment variable `ASTRA_DB_JAVA_TEST_ENV` is set to `local`
12+
> - Tests are annotated with
13+
>
14+
> ```java
15+
> @EnabledIfSystemProperty(named = "ASTRA_DB_JAVA_TEST_ENV", matches = "local")
16+
> @DisabledIfSystemProperty(named = "ASTRA_DB_JAVA_TEST_ENV", matches = "(?!local)")
17+
> public class MyTest {
18+
> }
19+
> ```
20+
21+
- To run with IDE make sure to set the environment variable `ASTRA_DB_JAVA_TEST_ENV` to `local` in your Run configuration
22+
23+
24+
## Run against Astra DEV
25+
26+
- `ASTRA_DB_APPLICATION_TOKEN_DEV` should be defined in your environment variables, it is not in the different POM for privacy reasons.
27+
28+
- To run with Maven (AWS,`eu-west-2`)
29+
30+
```bash
31+
mvn clean test -Ptest_astra_dev
32+
```
33+
34+
- To run with Maven on One particular region
35+
36+
```java
37+
mvn clean test -Ptest_astra_dev \
38+
-DASTRA_CLOUD_PROVIDER_DEV=GCP \
39+
-DASTRA_CLOUD_REGION_DEV=us-central-1
40+
```
41+
42+
- To run overriding also the token
43+
44+
```java
45+
mvn clean test \
46+
-Ptest_astra_dev \
47+
-DASTRA_CLOUD_PROVIDER_DEV=GCP \
48+
-DASTRA_CLOUD_REGION_DEV=us-central-1 \
49+
-ASTRA_DB_APPLICATION_TOKEN_DEV=AstraCS:... \
50+
-Dtest="com.datastax.astra.test.integration.*.*Test"
51+
```
52+

astra-db-java-tools/src/test/java/com/datastax/astra/samples/CsvPhilosophers.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public static void main(String[] args) throws Exception {
3030
CsvLoader.load(csvFilename, collection, new CsvRowMapper() {
3131
@Override
3232
public Document map(Document csvRow) {
33+
3334
// Tags should be an Array
3435
csvRow.vectorize(csvRow.getString("quote"));
3536
csvRow.append("tags",csvRow.getString("tags").split(";"));

astra-db-java/pom.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@
8686
<artifactId>awaitility</artifactId>
8787
<scope>test</scope>
8888
</dependency>
89+
90+
<!-- Embedding model in memory for testing -->
91+
<dependency>
92+
<groupId>dev.langchain4j</groupId>
93+
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
94+
<version>1.0.0-beta2</version>
95+
<scope>test</scope>
96+
</dependency>
97+
8998
</dependencies>
9099

91100
<build>

astra-db-java/src/main/java/com/datastax/astra/client/admin/AstraDBDatabaseAdmin.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.datastax.astra.client.core.options.DataAPIClientOptions;
2626
import com.datastax.astra.client.databases.commands.results.FindEmbeddingProvidersResult;
2727
import com.datastax.astra.client.databases.DatabaseOptions;
28+
import com.datastax.astra.client.databases.commands.results.FindRerankingProvidersResult;
2829
import com.datastax.astra.internal.api.AstraApiEndpoint;
2930
import com.datastax.astra.internal.command.AbstractCommandRunner;
3031
import com.datastax.astra.internal.utils.Assert;
@@ -192,6 +193,14 @@ public FindEmbeddingProvidersResult findEmbeddingProviders() {
192193
return new FindEmbeddingProvidersResult(admin.findEmbeddingProviders().getEmbeddingProviders());
193194
}
194195

196+
/** {@inheritDoc} */
197+
@Override
198+
public FindRerankingProvidersResult findRerankingProviders() {
199+
log.debug("findRerankingProviders");
200+
DataAPIDatabaseAdmin admin = new DataAPIDatabaseAdmin(db, this.options);
201+
return new FindRerankingProvidersResult(admin.findRerankingProviders().getRerankingProviders());
202+
}
203+
195204
/** {@inheritDoc} */
196205
@Override
197206
public void createKeyspace(String keyspace, boolean updateDBKeyspace) {

astra-db-java/src/main/java/com/datastax/astra/client/admin/DataAPIDatabaseAdmin.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323
import com.datastax.astra.client.core.options.BaseOptions;
2424
import com.datastax.astra.client.core.commands.Command;
2525
import com.datastax.astra.client.core.commands.CommandType;
26+
import com.datastax.astra.client.core.rerank.RerankProvider;
2627
import com.datastax.astra.client.databases.commands.results.FindEmbeddingProvidersResult;
2728
import com.datastax.astra.client.core.vectorize.EmbeddingProvider;
2829
import com.datastax.astra.client.databases.Database;
30+
import com.datastax.astra.client.databases.commands.results.FindRerankingProvidersResult;
2931
import com.datastax.astra.client.databases.definition.keyspaces.KeyspaceOptions;
3032
import com.datastax.astra.internal.api.DataAPIResponse;
3133
import com.datastax.astra.internal.command.AbstractCommandRunner;
@@ -109,6 +111,14 @@ public FindEmbeddingProvidersResult findEmbeddingProviders() {
109111
EmbeddingProvider.class));
110112
}
111113

114+
@Override
115+
public FindRerankingProvidersResult findRerankingProviders() {
116+
DataAPIResponse res = runCommand(Command.create("findRerankingProviders"));
117+
return new FindRerankingProvidersResult(
118+
res.getStatusKeyAsMap("rerankingProviders",
119+
RerankProvider.class));
120+
}
121+
112122
/** {@inheritDoc} */
113123
@Override
114124
public Database getDatabase() {

astra-db-java/src/main/java/com/datastax/astra/client/admin/DatabaseAdmin.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121
*/
2222

2323
import com.datastax.astra.client.core.options.BaseOptions;
24+
import com.datastax.astra.client.core.rerank.RerankProvider;
2425
import com.datastax.astra.client.databases.Database;
2526
import com.datastax.astra.client.core.commands.CommandRunner;
2627
import com.datastax.astra.client.core.vectorize.EmbeddingProvider;
2728
import com.datastax.astra.client.databases.commands.results.FindEmbeddingProvidersResult;
29+
import com.datastax.astra.client.databases.commands.results.FindRerankingProvidersResult;
2830
import com.datastax.astra.internal.utils.Assert;
2931

3032
import java.util.Set;
@@ -81,6 +83,24 @@ public interface DatabaseAdmin {
8183
*/
8284
FindEmbeddingProvidersResult findEmbeddingProviders();
8385

86+
/**
87+
* Retrieve the list of reranking providers available in the current database. Reranking providers are services
88+
* that sort a list of record based on a algorithm (eg bm25) . This method returns a map of provider names to
89+
* {@link RerankProvider} instances, allowing applications
90+
* to access and utilize the reranking services.
91+
*
92+
* <p>Example usage:</p>
93+
* <pre>
94+
* {@code
95+
* // Assuming 'client' is an instance of DataApiClient
96+
* Map<String, EmbeddingProvider> providers = client.findEmbeddingProvidersAsMap());
97+
* }
98+
* </pre>
99+
* @return
100+
* list of available providers
101+
*/
102+
FindRerankingProvidersResult findRerankingProviders();
103+
84104
/**
85105
* Asynchronously retrieves a stream of keyspaces names available in the current database. This method facilitates
86106
* non-blocking operations by allowing the application to continue executing other tasks while the list of keyspace

astra-db-java/src/main/java/com/datastax/astra/client/collections/Collection.java

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
import com.datastax.astra.client.core.query.Filter;
6060
import com.datastax.astra.client.core.query.Filters;
6161
import com.datastax.astra.client.core.query.Projection;
62-
import com.datastax.astra.client.core.reranking.RerankResult;
62+
import com.datastax.astra.client.core.rerank.RerankResult;
6363
import com.datastax.astra.client.core.vector.DataAPIVector;
6464
import com.datastax.astra.client.databases.Database;
6565
import com.datastax.astra.client.exceptions.DataAPIException;
@@ -73,6 +73,7 @@
7373
import com.datastax.astra.internal.serdes.collections.DocumentSerializer;
7474
import com.datastax.astra.internal.serdes.tables.RowMapper;
7575
import com.datastax.astra.internal.utils.Assert;
76+
import com.datastax.astra.internal.utils.BetaPreview;
7677
import com.datastax.astra.internal.utils.EscapeUtils;
7778
import lombok.Getter;
7879
import lombok.extern.slf4j.Slf4j;
@@ -1082,31 +1083,32 @@ public Page<T> findPage(Filter filter, CollectionFindOptions options) {
10821083
* @return
10831084
* the find iterable interface
10841085
*/
1086+
@BetaPreview
10851087
public CollectionFindAndRerankCursor<T,T> findAndRerank(Filter filter, CollectionFindAndRerankOptions options) {
10861088
return findAndRerank(filter, options, getDocumentClass());
10871089
}
10881090

1091+
@BetaPreview
10891092
public <R> CollectionFindAndRerankCursor<T, R> findAndRerank(Filter filter, CollectionFindAndRerankOptions options, Class<R> newRowType) {
10901093
return new CollectionFindAndRerankCursor<>(this, filter, options, newRowType);
10911094
}
10921095

1096+
@BetaPreview
10931097
public <R> Page<RerankResult<R>> findAndRerankPage(Filter filter, CollectionFindAndRerankOptions options, Class<R> newRowType) {
10941098
Command findAndRerankCommand = Command
10951099
.create("findAndRerank")
10961100
.withFilter(filter);
10971101
if (options != null) {
10981102
findAndRerankCommand
1099-
.withSort(options.getSortArray())
1100-
.withProjection(options.getProjectionArray())
1101-
.withOptions(new Document()
1102-
.appendIfNotNull("rerankOn", options.rerankOn())
1103-
.appendIfNotNull("limit", options.limit())
1104-
.appendIfNotNull("hybridProjection", options.hybridProjection().getValue())
1105-
.appendIfNotNull("hybridLimits", options.hybridLimits())
1106-
.appendIfNotNull(INPUT_INCLUDE_SORT_VECTOR, options.includeSortVector())
1107-
.appendIfNotNull(INPUT_INCLUDE_SIMILARITY, options.includeSimilarity())
1108-
)
1109-
;
1103+
.withSort(options.getSortArray())
1104+
.withProjection(options.getProjectionArray())
1105+
.withOptions(new Document()
1106+
.appendIfNotNull("rerankOn", options.rerankOn())
1107+
.appendIfNotNull("limit", options.limit())
1108+
.appendIfNotNull("hybridLimits", options.hybridLimits())
1109+
.appendIfNotNull(INPUT_INCLUDE_SORT_VECTOR, options.includeSortVector())
1110+
.appendIfNotNull(INPUT_INCLUDE_SCORES, options.includeScores())
1111+
.appendIfNotNull(INPUT_INCLUDE_SIMILARITY, options.includeSimilarity()));
11101112
}
11111113

11121114
// Responses MOCK for now
@@ -1139,10 +1141,6 @@ public <R> Page<RerankResult<R>> findAndRerankPage(Filter filter, CollectionFind
11391141
DocumentSerializer serializer = new DocumentSerializer();
11401142
R results1 = serializer.convertValue(document, newRowType);
11411143

1142-
// MAP WITH ROW FUNCTION
1143-
Row row = RowMapper.mapAsRow(document);
1144-
R result = RowMapper.mapFromRow(row, getSerializer(), newRowType);
1145-
11461144
// Getting associated document response
11471145
Document documentResponse = documentResponses.get(i);
11481146
Map<String, Double> scores = documentResponse.getMap("scores", String.class, Double.class);
@@ -1199,12 +1197,7 @@ public <R> Page<R> findPage(Filter filter, CollectionFindOptions options, Class<
11991197
return new Page<>(
12001198
apiResponse.getData().getNextPageState(),
12011199
apiResponse.getData().getDocuments().stream()
1202-
.map(d -> {
1203-
Row row = RowMapper.mapAsRow(d);
1204-
return mapFromRow(row, getSerializer(), newRowType);
1205-
})
1206-
// .map(d -> d.map(newRowType))
1207-
//.map(d -> RowMapper.mapFromRow(d, getSerializer(), newRowType))
1200+
.map(d -> d.map(newRowType))
12081201
.collect(Collectors.toList()), sortVector);
12091202
}
12101203

astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/cursor/CollectionFindAndRerankCursor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import com.datastax.astra.client.core.query.Filter;
2727
import com.datastax.astra.client.core.query.Projection;
2828
import com.datastax.astra.client.core.query.Sort;
29-
import com.datastax.astra.client.core.reranking.RerankResult;
29+
import com.datastax.astra.client.core.rerank.RerankResult;
3030
import com.datastax.astra.internal.command.AbstractCursor;
3131
import lombok.Getter;
3232

astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/cursor/CollectionFindCursor.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import com.datastax.astra.client.collections.Collection;
2424
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
25+
import com.datastax.astra.client.collections.definition.documents.Document;
2526
import com.datastax.astra.client.core.paging.CursorState;
2627
import com.datastax.astra.client.core.query.Filter;
2728
import com.datastax.astra.client.core.query.Projection;

astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/options/CollectionFindAndRerankOptions.java

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import com.datastax.astra.client.core.commands.CommandType;
2424
import com.datastax.astra.client.core.hybrid.Hybrid;
2525
import com.datastax.astra.client.core.hybrid.HybridLimits;
26-
import com.datastax.astra.client.core.hybrid.HybridProjection;
2726
import com.datastax.astra.client.core.options.BaseOptions;
2827
import com.datastax.astra.client.core.query.Projection;
2928
import com.datastax.astra.client.core.query.Sort;
@@ -70,7 +69,7 @@ public class CollectionFindAndRerankOptions extends BaseOptions<CollectionFindAn
7069
/**
7170
* Options for hybrid projection
7271
*/
73-
HybridProjection hybridProjection;
72+
Boolean includeScores;
7473

7574
/**
7675
* Flag to include sortVector in the result when operating a semantic search.
@@ -187,15 +186,5 @@ public CollectionFindAndRerankOptions rerankOn(String rerankOn) {
187186
return this;
188187
}
189188

190-
/**
191-
* Add a hybridProjection clause in the find block
192-
*
193-
* @param hybridProjection value for hybridProjection options
194-
* @return current command
195-
*/
196-
public CollectionFindAndRerankOptions hybridProjection(HybridProjection hybridProjection) {
197-
this.hybridProjection = hybridProjection;
198-
return this;
199-
}
200189

201190
}

0 commit comments

Comments
 (0)