Skip to content

Commit 527ddc6

Browse files
authored
fix wiki embeddings (#23)
* fix wiki embeddings The old ones were deleted. Requires us to switch to 1024d vectors. * dataset cache
1 parent 4385fad commit 527ddc6

File tree

11 files changed

+60
-546
lines changed

11 files changed

+60
-546
lines changed

cmd/nightly/datasource_cohere.go

Lines changed: 19 additions & 267 deletions
Large diffs are not rendered by default.

data.go

Lines changed: 27 additions & 265 deletions
Large diffs are not rendered by default.

main.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ func run(ctx context.Context, shutdown context.CancelFunc) error {
8484
// for queries and small upserts).
8585
executor := &TemplateExecutor{
8686
nextId: 0,
87-
vectors: RandomVectorSource(768),
87+
vectors: RandomVectorSource(1024),
8888
msmarco: &MSMarcoSource{},
8989
}
9090

@@ -142,7 +142,7 @@ func run(ctx context.Context, shutdown context.CancelFunc) error {
142142
if err != nil {
143143
return fmt.Errorf("failed to setup namespaces: %w", err)
144144
}
145-
executor.vectors = RandomVectorSource(768)
145+
executor.vectors = RandomVectorSource(1024)
146146

147147
// Wait until the largest namespace has been fully indexed,
148148
// i.e. we just dumped in a huge amount of documents
@@ -345,7 +345,7 @@ func setupNamespaces(
345345

346346
defer func() {
347347
executor.lock.Lock()
348-
executor.vectors = RandomVectorSource(768)
348+
executor.vectors = RandomVectorSource(1024)
349349
executor.lock.Unlock()
350350
}()
351351

templates/document_attrs.json.tmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"id": {{ id }},
3-
"vector": {{ vector 768 }},
3+
"vector": {{ vector 1024 }},
44
"attributes": {
55
"attr1": "{{ string_with_cardinality 1 10 }}",
66
"attr2": "{{ string 32 }}",
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
22
"id": {{ id }},
3-
"vector": {{ vector 768 }}
3+
"vector": {{ vector 1024 }}
44
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# name=ANN (10M, 768dims) - No filter, strongly consistent
1+
# name=ANN (10M, 1024dims) - No filter, strongly consistent
22
{
33
"top_k": 10,
4-
"rank_by": ["vector", "ANN", {{ vector 768 }}]
4+
"rank_by": ["vector", "ANN", {{ vector 1024 }}]
55
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# name=ANN (10M, 768dims) - No filter, cold
1+
# name=ANN (10M, 1024dims) - No filter, cold
22
{
33
"top_k": 10,
4-
"rank_by": ["vector", "ANN", {{ vector 768 }}],
4+
"rank_by": ["vector", "ANN", {{ vector 1024 }}],
55
"disable_cache": true
66
}

templates/nightly/10m_768dim/ann_eventual_consistency.tmpl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# name=ANN (10M, 768dims) - No filter, eventually consistent
1+
# name=ANN (10M, 1024dims) - No filter, eventually consistent
22
{
33
"top_k": 10,
4-
"rank_by": ["vector", "ANN", {{ vector 768 }}],
4+
"rank_by": ["vector", "ANN", {{ vector 1024 }}],
55
"consistency": {
66
"level": "eventual"
77
}

templates/nightly/10m_768dim/document.tmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
{{define "row"}}
44
{
55
"id": {{ id }},
6-
"vector": {{ vector 768 }},
6+
"vector": {{ vector 1024 }},
77
"text": "{{ paragraph }}"
88
}
99
{{end}}

templates/query_attrs.json.tmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"top_k": 10,
3-
"rank_by": ["vector", "ANN", {{ vector 768 }}],
3+
"rank_by": ["vector", "ANN", {{ vector 1024 }}],
44
"filter": ["attr1", "Eq", "a"],
55
"include_attributes": [
66
"attr2",

0 commit comments

Comments
 (0)