Skip to content

Commit 2916abe

Browse files
committed
Add Sample
1 parent ad10dc7 commit 2916abe

File tree

2 files changed

+67
-1
lines changed

2 files changed

+67
-1
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package com.datastax.astra.tool;
2+
3+
import com.datastax.astra.client.Collection;
4+
import com.datastax.astra.client.DataAPIClient;
5+
import com.datastax.astra.client.Database;
6+
import com.datastax.astra.client.model.CollectionOptions;
7+
import com.datastax.astra.client.model.Document;
8+
import com.datastax.astra.client.model.SimilarityMetric;
9+
import com.datastax.astra.tool.csv.CsvLoader;
10+
import com.datastax.astra.tool.csv.CsvRowMapper;
11+
import lombok.extern.slf4j.Slf4j;
12+
13+
/**
14+
* Load a CSV to Astra
15+
*/
16+
@Slf4j
17+
public class CsvLoaderWiki {
18+
19+
private static final String ASTRA_TOKEN = "<CHANGE_ME>";
20+
private static final String API_ENDPOINT = "<CHANGE_ME>";
21+
private static final String CSV_FILE = "<CHANGE_ME>";
22+
23+
public static void main(String[] args) throws Exception {
24+
// Get an empty Collection
25+
DataAPIClient client = new DataAPIClient(ASTRA_TOKEN);
26+
Database wikiDataDb = client.getDatabase(API_ENDPOINT);
27+
28+
Collection<Document> wiki = wikiDataDb.createCollection(
29+
"wiki",
30+
// Create collection with a Service in vectorize
31+
CollectionOptions.builder()
32+
.vectorDimension(768) // found from the CSV
33+
.vectorSimilarity(SimilarityMetric.COSINE)
34+
.build());
35+
36+
// Optionally delete all documents
37+
//wiki.deleteAll();
38+
39+
CsvLoader.load(CSV_FILE, wiki, new CsvRowMapper() {
40+
int line = 0;
41+
@Override
42+
public Document map(Document doc) {
43+
line++;
44+
if (line % 1000 == 0) {
45+
log.info("{} lines have been submitted ", line);
46+
}
47+
doc.put("_id", doc.get("", String.class));
48+
doc.remove("");
49+
String embedding = doc.get("embedding", String.class);
50+
embedding = embedding.replaceAll("\\[", "");
51+
embedding = embedding.replaceAll("\\]", "");
52+
embedding = embedding.replaceAll("\\\n", "");
53+
String[] parts = embedding.trim().split("\\s+");
54+
float[] floatArray = new float[parts.length];
55+
for (int i = 0; i < parts.length; i++) {
56+
floatArray[i] = Float.parseFloat(parts[i]);
57+
}
58+
doc.put("$vector", floatArray);
59+
doc.remove("embedding");
60+
return doc;
61+
}
62+
});
63+
64+
}
65+
66+
}

tools/src/main/java/com/datastax/astra/tool/csv/CsvLoaderSettings.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ public class CsvLoaderSettings {
99

1010
private static final int THREAD_POOL_SIZE = 5;
1111

12-
private static final int TIMEOUT = 180;
12+
private static final int TIMEOUT = 1800;
1313

1414
@Builder.Default
1515
int batchSize = BATCH_SIZE;;

0 commit comments

Comments
 (0)