Skip to content

Commit c36554d

Browse files
author
nnethercott
committed
double trees conservatively
1 parent 9870e41 commit c36554d

File tree

2 files changed

+46
-40
lines changed

2 files changed

+46
-40
lines changed

src/tests/writer.rs

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -49,33 +49,33 @@ fn guess_right_number_of_tree_while_growing() {
4949

5050
assert_snapshot!(quick_target(768, &b1), @"1");
5151
assert_snapshot!(quick_target(768, &b10), @"10");
52-
assert_snapshot!(quick_target(768, &b100), @"30");
53-
assert_snapshot!(quick_target(768, &b1000), @"60");
54-
assert_snapshot!(quick_target(768, &b10_000), @"119");
55-
assert_snapshot!(quick_target(768, &b100_000), @"237");
56-
assert_snapshot!(quick_target(768, &b1_000_000), @"473");
57-
assert_snapshot!(quick_target(768, &b10_000_000), @"946");
58-
assert_snapshot!(quick_target(768, &b100_000_000), @"1892");
52+
assert_snapshot!(quick_target(768, &b100), @"60");
53+
assert_snapshot!(quick_target(768, &b1000), @"119");
54+
assert_snapshot!(quick_target(768, &b10_000), @"237");
55+
assert_snapshot!(quick_target(768, &b100_000), @"473");
56+
assert_snapshot!(quick_target(768, &b1_000_000), @"946");
57+
assert_snapshot!(quick_target(768, &b10_000_000), @"1892");
58+
assert_snapshot!(quick_target(768, &b100_000_000), @"3784");
5959

6060
assert_snapshot!(quick_target(1512, &b1), @"1");
6161
assert_snapshot!(quick_target(1512, &b10), @"10");
62-
assert_snapshot!(quick_target(1512, &b100), @"37");
63-
assert_snapshot!(quick_target(1512, &b1000), @"73");
64-
assert_snapshot!(quick_target(1512, &b10_000), @"145");
65-
assert_snapshot!(quick_target(1512, &b100_000), @"290");
66-
assert_snapshot!(quick_target(1512, &b1_000_000), @"580");
67-
assert_snapshot!(quick_target(1512, &b10_000_000), @"1160");
68-
assert_snapshot!(quick_target(1512, &b100_000_000), @"2320");
62+
assert_snapshot!(quick_target(1512, &b100), @"73");
63+
assert_snapshot!(quick_target(1512, &b1000), @"145");
64+
assert_snapshot!(quick_target(1512, &b10_000), @"290");
65+
assert_snapshot!(quick_target(1512, &b100_000), @"580");
66+
assert_snapshot!(quick_target(1512, &b1_000_000), @"1160");
67+
assert_snapshot!(quick_target(1512, &b10_000_000), @"2320");
68+
assert_snapshot!(quick_target(1512, &b100_000_000), @"4639");
6969

7070
assert_snapshot!(quick_target(3072, &b1), @"1");
7171
assert_snapshot!(quick_target(3072, &b10), @"10");
72-
assert_snapshot!(quick_target(3072, &b100), @"45");
73-
assert_snapshot!(quick_target(3072, &b1000), @"90");
74-
assert_snapshot!(quick_target(3072, &b10_000), @"180");
75-
assert_snapshot!(quick_target(3072, &b100_000), @"359");
76-
assert_snapshot!(quick_target(3072, &b1_000_000), @"718");
77-
assert_snapshot!(quick_target(3072, &b10_000_000), @"1436");
78-
assert_snapshot!(quick_target(3072, &b100_000_000), @"2872");
72+
assert_snapshot!(quick_target(3072, &b100), @"90");
73+
assert_snapshot!(quick_target(3072, &b1000), @"180");
74+
assert_snapshot!(quick_target(3072, &b10_000), @"359");
75+
assert_snapshot!(quick_target(3072, &b100_000), @"718");
76+
assert_snapshot!(quick_target(3072, &b1_000_000), @"1436");
77+
assert_snapshot!(quick_target(3072, &b10_000_000), @"2872");
78+
assert_snapshot!(quick_target(3072, &b100_000_000), @"5743");
7979
}
8080

8181
#[ignore = "strange test"]
@@ -970,17 +970,23 @@ fn delete_extraneous_tree() {
970970
insta::assert_snapshot!(handle, @r#"
971971
==================
972972
Dumping index 0
973-
Root: Metadata { dimensions: 4, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [0, 1, 2], distance: "euclidean" }
973+
Root: Metadata { dimensions: 4, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [0, 1, 2, 3, 4], distance: "euclidean" }
974974
Version: Version { major: 0, minor: 7, patch: 0 }
975-
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 3, right: 4, normal: Leaf { header: NodeHeaderEuclidean { bias: "-1.9500" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
976-
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 6, right: 7, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.7333" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
977-
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 9, right: 10, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.6111" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
978-
Tree 3: Descendants(Descendants { descendants: [0, 1] })
979-
Tree 4: Descendants(Descendants { descendants: [2, 3, 4] })
975+
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 5, right: 6, normal: Leaf { header: NodeHeaderEuclidean { bias: "-1.9500" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] } })
976+
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 8, right: 9, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.7333" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
977+
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 11, right: 12, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.6111" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
978+
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 14, right: 15, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.5000" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
979+
Tree 4: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 17, right: 18, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.6714" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
980+
Tree 5: Descendants(Descendants { descendants: [0, 1] })
980981
Tree 6: Descendants(Descendants { descendants: [2, 3, 4] })
981-
Tree 7: Descendants(Descendants { descendants: [0, 1] })
982-
Tree 9: Descendants(Descendants { descendants: [2, 3, 4] })
983-
Tree 10: Descendants(Descendants { descendants: [0, 1] })
982+
Tree 8: Descendants(Descendants { descendants: [2, 3, 4] })
983+
Tree 9: Descendants(Descendants { descendants: [0, 1] })
984+
Tree 11: Descendants(Descendants { descendants: [2, 3, 4] })
985+
Tree 12: Descendants(Descendants { descendants: [0, 1] })
986+
Tree 14: Descendants(Descendants { descendants: [2, 3, 4] })
987+
Tree 15: Descendants(Descendants { descendants: [0, 1] })
988+
Tree 17: Descendants(Descendants { descendants: [2, 3, 4] })
989+
Tree 18: Descendants(Descendants { descendants: [0, 1] })
984990
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [0.0000, 0.0000, 0.0000, 0.0000] })
985991
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] })
986992
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [2.0000, 0.0000, 0.0000, 0.0000] })
@@ -998,12 +1004,12 @@ fn delete_extraneous_tree() {
9981004
Dumping index 0
9991005
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [1, 2], distance: "euclidean" }
10001006
Version: Version { major: 0, minor: 7, patch: 0 }
1001-
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 6, right: 7, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.7333" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
1002-
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 9, right: 10, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.6111" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
1003-
Tree 6: Descendants(Descendants { descendants: [2, 3, 4] })
1004-
Tree 7: Descendants(Descendants { descendants: [0, 1] })
1005-
Tree 9: Descendants(Descendants { descendants: [2, 3, 4] })
1006-
Tree 10: Descendants(Descendants { descendants: [0, 1] })
1007+
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 8, right: 9, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.7333" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
1008+
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 11, right: 12, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.6111" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
1009+
Tree 8: Descendants(Descendants { descendants: [2, 3, 4] })
1010+
Tree 9: Descendants(Descendants { descendants: [0, 1] })
1011+
Tree 11: Descendants(Descendants { descendants: [2, 3, 4] })
1012+
Tree 12: Descendants(Descendants { descendants: [0, 1] })
10071013
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [0.0000, 0.0000, 0.0000, 0.0000] })
10081014
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] })
10091015
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [2.0000, 0.0000, 0.0000, 0.0000] })
@@ -1021,9 +1027,9 @@ fn delete_extraneous_tree() {
10211027
Dumping index 0
10221028
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [2], distance: "euclidean" }
10231029
Version: Version { major: 0, minor: 7, patch: 0 }
1024-
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 9, right: 10, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.6111" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
1025-
Tree 9: Descendants(Descendants { descendants: [2, 3, 4] })
1026-
Tree 10: Descendants(Descendants { descendants: [0, 1] })
1030+
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: 11, right: 12, normal: Leaf { header: NodeHeaderEuclidean { bias: "1.6111" }, vector: [-1.0000, 0.0000, 0.0000, 0.0000] } })
1031+
Tree 11: Descendants(Descendants { descendants: [2, 3, 4] })
1032+
Tree 12: Descendants(Descendants { descendants: [0, 1] })
10271033
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [0.0000, 0.0000, 0.0000, 0.0000] })
10281034
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [1.0000, 0.0000, 0.0000, 0.0000] })
10291035
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: "0.0000" }, vector: [2.0000, 0.0000, 0.0000, 0.0000] })

src/writer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ pub(crate) fn target_n_trees(
12511251
//
12521252
// To account for different embedding dimensions we notice that most providers offer
12531253
// embedings on ~O(10^3) and let `b` = log10(dim)
1254-
let exp = (item_indices.len() as f64).log10() + (dimensions as f64).log10();
1254+
let exp = (item_indices.len() as f64).log10() + (dimensions as f64).log10() + 1.0;
12551255
let mut nb_trees = 2f64.powf(exp).ceil() as u64;
12561256
nb_trees = nb_trees.min(item_indices.len());
12571257

0 commit comments

Comments
 (0)