File tree Expand file tree Collapse file tree 1 file changed +2
-7
lines changed
Expand file tree Collapse file tree 1 file changed +2
-7
lines changed Original file line number Diff line number Diff line change @@ -1366,17 +1366,12 @@ pub(crate) fn target_n_trees(
13661366 // In the case we never made any tree we can roughly guess how many trees we want to build in total
13671367 None => {
13681368 // We notice that increasing the dataset size by an order of magnitude requires
1369- // doubling the number of trees to saturate recall. [link pr]
1369+ // doubling the number of trees to saturate recall
13701370 // That relation looks like: n_trees = 2^{log10(item_indices.len()) + b}, with an adjustment
13711371 // factor b to center the trees.
13721372 //
1373- // For b = 3 we get :
1374- // - item_indices.len() = 10^5 => n_trees = 256
1375- // - item_indices.len() = 10^6 => n_trees = 512
1376- // - item_indices.len() = 10^7 => n_trees = 1024
1377- //
13781373 // To account for different embedding dimensions we notice that most providers offer
1379- // embedings on ~O(10^3) and let `b` = log10(dim)
1374+ // embedings on ~O(10^3) and let `b` = log10(dim) + 1
13801375 let exp = ( item_indices. len ( ) as f64 ) . log10 ( ) + ( dimensions as f64 ) . log10 ( ) + 1.0 ;
13811376 let mut nb_trees = 2f64 . powf ( exp) . ceil ( ) as u64 ;
13821377 nb_trees = nb_trees. min ( item_indices. len ( ) ) ;
You can’t perform that action at this time.
0 commit comments