Skip to content

Commit 034e19a

Browse files
nnethercottirevoire
authored andcommitted
double trees conservatively
1 parent 969f8e2 commit 034e19a

File tree

2 files changed

+47
-41
lines changed

2 files changed

+47
-41
lines changed

src/tests/writer.rs

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -49,33 +49,33 @@ fn guess_right_number_of_tree_while_growing() {
4949

5050
assert_snapshot!(quick_target(768, &b1), @"1");
5151
assert_snapshot!(quick_target(768, &b10), @"10");
52-
assert_snapshot!(quick_target(768, &b100), @"30");
53-
assert_snapshot!(quick_target(768, &b1000), @"60");
54-
assert_snapshot!(quick_target(768, &b10_000), @"119");
55-
assert_snapshot!(quick_target(768, &b100_000), @"237");
56-
assert_snapshot!(quick_target(768, &b1_000_000), @"473");
57-
assert_snapshot!(quick_target(768, &b10_000_000), @"946");
58-
assert_snapshot!(quick_target(768, &b100_000_000), @"1892");
52+
assert_snapshot!(quick_target(768, &b100), @"60");
53+
assert_snapshot!(quick_target(768, &b1000), @"119");
54+
assert_snapshot!(quick_target(768, &b10_000), @"237");
55+
assert_snapshot!(quick_target(768, &b100_000), @"473");
56+
assert_snapshot!(quick_target(768, &b1_000_000), @"946");
57+
assert_snapshot!(quick_target(768, &b10_000_000), @"1892");
58+
assert_snapshot!(quick_target(768, &b100_000_000), @"3784");
5959

6060
assert_snapshot!(quick_target(1512, &b1), @"1");
6161
assert_snapshot!(quick_target(1512, &b10), @"10");
62-
assert_snapshot!(quick_target(1512, &b100), @"37");
63-
assert_snapshot!(quick_target(1512, &b1000), @"73");
64-
assert_snapshot!(quick_target(1512, &b10_000), @"145");
65-
assert_snapshot!(quick_target(1512, &b100_000), @"290");
66-
assert_snapshot!(quick_target(1512, &b1_000_000), @"580");
67-
assert_snapshot!(quick_target(1512, &b10_000_000), @"1160");
68-
assert_snapshot!(quick_target(1512, &b100_000_000), @"2320");
62+
assert_snapshot!(quick_target(1512, &b100), @"73");
63+
assert_snapshot!(quick_target(1512, &b1000), @"145");
64+
assert_snapshot!(quick_target(1512, &b10_000), @"290");
65+
assert_snapshot!(quick_target(1512, &b100_000), @"580");
66+
assert_snapshot!(quick_target(1512, &b1_000_000), @"1160");
67+
assert_snapshot!(quick_target(1512, &b10_000_000), @"2320");
68+
assert_snapshot!(quick_target(1512, &b100_000_000), @"4639");
6969

7070
assert_snapshot!(quick_target(3072, &b1), @"1");
7171
assert_snapshot!(quick_target(3072, &b10), @"10");
72-
assert_snapshot!(quick_target(3072, &b100), @"45");
73-
assert_snapshot!(quick_target(3072, &b1000), @"90");
74-
assert_snapshot!(quick_target(3072, &b10_000), @"180");
75-
assert_snapshot!(quick_target(3072, &b100_000), @"359");
76-
assert_snapshot!(quick_target(3072, &b1_000_000), @"718");
77-
assert_snapshot!(quick_target(3072, &b10_000_000), @"1436");
78-
assert_snapshot!(quick_target(3072, &b100_000_000), @"2872");
72+
assert_snapshot!(quick_target(3072, &b100), @"90");
73+
assert_snapshot!(quick_target(3072, &b1000), @"180");
74+
assert_snapshot!(quick_target(3072, &b10_000), @"359");
75+
assert_snapshot!(quick_target(3072, &b100_000), @"718");
76+
assert_snapshot!(quick_target(3072, &b1_000_000), @"1436");
77+
assert_snapshot!(quick_target(3072, &b10_000_000), @"2872");
78+
assert_snapshot!(quick_target(3072, &b100_000_000), @"5743");
7979
}
8080

8181
#[ignore = "strange test"]
@@ -980,17 +980,23 @@ fn delete_extraneous_tree() {
980980
insta::assert_snapshot!(handle, @r#"
981981
==================
982982
Dumping index 0
983-
Root: Metadata { dimensions: 4, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [0, 1, 2], distance: "euclidean" }
983+
Root: Metadata { dimensions: 4, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [0, 1, 2, 3, 4], distance: "euclidean" }
984984
Version: Version { major: 0, minor: 7, patch: 0 }
985-
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 7, right: 8, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.5952" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
986-
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 5, right: 6, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.2778" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
987-
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 3, right: 4, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.3125" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
988-
Tree 3: Descendants(Descendants { descendants: [0, 1, 2] })
989-
Tree 4: Descendants(Descendants { descendants: [3, 4] })
990-
Tree 5: Descendants(Descendants { descendants: [0, 1, 2] })
991-
Tree 6: Descendants(Descendants { descendants: [3, 4] })
992-
Tree 7: Descendants(Descendants { descendants: [2, 3, 4] })
993-
Tree 8: Descendants(Descendants { descendants: [0, 1] })
985+
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 13, right: 14, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.5952" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
986+
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 11, right: 12, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.2778" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
987+
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 9, right: 10, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.3125" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
988+
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 7, right: 8, normal: Leaf { header: NodeHeaderEuclidean { bias: "-1.8857" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
989+
Tree 4: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 5, right: 6, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.7500" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
990+
Tree 5: Descendants(Descendants { descendants: [2, 3, 4] })
991+
Tree 6: Descendants(Descendants { descendants: [0, 1] })
992+
Tree 7: Descendants(Descendants { descendants: [0, 1] })
993+
Tree 8: Descendants(Descendants { descendants: [2, 3, 4] })
994+
Tree 9: Descendants(Descendants { descendants: [0, 1, 2] })
995+
Tree 10: Descendants(Descendants { descendants: [3, 4] })
996+
Tree 11: Descendants(Descendants { descendants: [0, 1, 2] })
997+
Tree 12: Descendants(Descendants { descendants: [3, 4] })
998+
Tree 13: Descendants(Descendants { descendants: [2, 3, 4] })
999+
Tree 14: Descendants(Descendants { descendants: [0, 1] })
9941000
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [0.0000, 0.0000, 0.0000, 0.0000] })
9951001
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] })
9961002
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [2.0000, 0.0000, 0.0000, 0.0000] })
@@ -1008,12 +1014,12 @@ fn delete_extraneous_tree() {
10081014
Dumping index 0
10091015
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [1, 2], distance: "euclidean" }
10101016
Version: Version { major: 0, minor: 7, patch: 0 }
1011-
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 5, right: 6, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.2778" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
1012-
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 3, right: 4, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.3125" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
1013-
Tree 3: Descendants(Descendants { descendants: [0, 1, 2] })
1014-
Tree 4: Descendants(Descendants { descendants: [3, 4] })
1015-
Tree 5: Descendants(Descendants { descendants: [0, 1, 2] })
1016-
Tree 6: Descendants(Descendants { descendants: [3, 4] })
1017+
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 11, right: 12, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.2778" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
1018+
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 9, right: 10, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.3125" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
1019+
Tree 9: Descendants(Descendants { descendants: [0, 1, 2] })
1020+
Tree 10: Descendants(Descendants { descendants: [3, 4] })
1021+
Tree 11: Descendants(Descendants { descendants: [0, 1, 2] })
1022+
Tree 12: Descendants(Descendants { descendants: [3, 4] })
10171023
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [0.0000, 0.0000, 0.0000, 0.0000] })
10181024
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] })
10191025
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [2.0000, 0.0000, 0.0000, 0.0000] })
@@ -1031,9 +1037,9 @@ fn delete_extraneous_tree() {
10311037
Dumping index 0
10321038
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [2], distance: "euclidean" }
10331039
Version: Version { major: 0, minor: 7, patch: 0 }
1034-
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 3, right: 4, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.3125" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
1035-
Tree 3: Descendants(Descendants { descendants: [0, 1, 2] })
1036-
Tree 4: Descendants(Descendants { descendants: [3, 4] })
1040+
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 9, right: 10, normal: Leaf { header: NodeHeaderEuclidean { bias: "-2.3125" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
1041+
Tree 9: Descendants(Descendants { descendants: [0, 1, 2] })
1042+
Tree 10: Descendants(Descendants { descendants: [3, 4] })
10371043
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [0.0000, 0.0000, 0.0000, 0.0000] })
10381044
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] })
10391045
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [2.0000, 0.0000, 0.0000, 0.0000] })

src/writer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1377,7 +1377,7 @@ pub(crate) fn target_n_trees(
13771377
//
13781378
// To account for different embedding dimensions we notice that most providers offer
13791379
// embedings on ~O(10^3) and let `b` = log10(dim)
1380-
let exp = (item_indices.len() as f64).log10() + (dimensions as f64).log10();
1380+
let exp = (item_indices.len() as f64).log10() + (dimensions as f64).log10() + 1.0;
13811381
let mut nb_trees = 2f64.powf(exp).ceil() as u64;
13821382
nb_trees = nb_trees.min(item_indices.len());
13831383

0 commit comments

Comments
 (0)