11import 'package:ml_algo/src/common/serializable/serializable.dart' ;
22import 'package:ml_algo/src/retrieval/kd_tree/helpers/create_kd_tree.dart' ;
3+ import 'package:ml_algo/src/retrieval/kd_tree/helpers/create_kd_tree_from_iterable.dart' ;
34import 'package:ml_algo/src/retrieval/kd_tree/kd_tree_impl.dart' ;
45import 'package:ml_algo/src/retrieval/kd_tree/kd_tree_neighbour.dart' ;
6+ import 'package:ml_algo/src/retrieval/kd_tree/kd_tree_split_strategy.dart' ;
57import 'package:ml_dataframe/ml_dataframe.dart' ;
68import 'package:ml_linalg/dtype.dart' ;
79import 'package:ml_linalg/matrix.dart' ;
810import 'package:ml_linalg/vector.dart' ;
911
10- /// KD-tree - an algorithm that provides efficient data retrieval. It splits
11- /// the whole searching space into partitions in binary tree form which means
12+ /// KD-tree - an algorithm that provides efficient data retrieval by splitting
13+ /// the whole searching space into partitions in form of binary tree which means
1214/// that data querying on average will take O(log(n)) time
15+ ///
16+ /// One can use this algorithm to perform KNN-search. It's recommended to use
17+ /// [KDTree] when the number of the input data columns is much less than the
18+ /// number of rows of the data - in this case, the search will be more efficient
1319abstract class KDTree implements Serializable {
20+ /// [points] Data points which will be used to build the tree.
21+ ///
22+ /// [leafSize] A number of points on a leaf node.
23+ ///
24+ /// The bigger the number, the less effective search is. If [leafSize] is
25+ /// equal to the number of [points] , a regular KNN-search will take place.
26+ ///
27+ /// Extremely small [leafSize] leads to ineffective memory usage since in
28+ /// this case a lot of kd-tree nodes will be allocated
29+ ///
30+ /// [dtype] A data type which will be used to convert raw data from [points]
31+ /// into internal numerical representation
32+ ///
33+ /// [splitStrategy] Describes how to choose a split dimension. Default value
34+ /// is [KDTreeSplitStrategy.largestVariance]
35+ ///
36+ /// if [splitStrategy] is [KDTreeSplitStrategy.largestVariance] , dimension with
37+ /// the widest column (in terms of variance) will be chosen to split the data
38+ ///
39+ /// if [splitStrategy] is [KDTreeSplitStrategy.inOrder] , dimension for data
40+ /// splits will be chosen one by one in order
41+ ///
42+ /// [KDTreeSplitStrategy.largestVariance] provides more accurate KNN-search,
43+ /// but this strategy takes much more time to build the tree than [KDTreeSplitStrategy.inOrder]
1444 factory KDTree (DataFrame points,
15- {int leafSie = 10 , DType dtype = DType .float32}) =>
16- createKDTree (points, leafSie, dtype);
45+ {int leafSize = 1 ,
46+ DType dtype = DType .float32,
47+ KDTreeSplitStrategy splitStrategy =
48+ KDTreeSplitStrategy .largestVariance}) =>
49+ createKDTree (points, leafSize, dtype, splitStrategy);
50+
51+ /// [pointsSrc] Data points which will be used to build the tree.
52+ ///
53+ /// [leafSize] A number of points on a leaf node.
54+ ///
55+ /// The bigger the number, the less effective search is. If [leafSize] is
56+ /// equal to the number of [pointsSrc] , a regular KNN-search will take place.
57+ ///
58+ /// Extremely small [leafSize] leads to ineffective memory usage since in
59+ /// this case a lot of kd-tree nodes will be allocated
60+ ///
61+ /// [dtype] A data type which will be used to convert raw data from [points]
62+ /// into internal numerical representation
63+ ///
64+ /// [splitStrategy] Describes how to choose a split dimension. Default value
65+ /// is [KDTreeSplitStrategy.largestVariance]
66+ ///
67+ /// if [splitStrategy] is [KDTreeSplitStrategy.largestVariance] , dimension with
68+ /// the widest column (in terms of variance) will be chosen to split the data
69+ ///
70+ /// if [splitStrategy] is [KDTreeSplitStrategy.inOrder] , dimension for data
71+ /// splits will be chosen one by one in order
72+ ///
73+ /// [KDTreeSplitStrategy.largestVariance] provides more accurate KNN-search,
74+ /// but this strategy takes much more time to build the tree than [KDTreeSplitStrategy.inOrder]
75+ factory KDTree .fromIterable (Iterable <Iterable <num >> pointsSrc,
76+ {int leafSize = 1 ,
77+ DType dtype = DType .float32,
78+ KDTreeSplitStrategy splitStrategy =
79+ KDTreeSplitStrategy .largestVariance}) =>
80+ createKDTreeFromIterable (pointsSrc, leafSize, dtype, splitStrategy);
1781
1882 factory KDTree .fromJson (Map <String , dynamic > json) =>
1983 KDTreeImpl .fromJson (json);
@@ -30,7 +94,7 @@ abstract class KDTree implements Serializable {
3094 /// this case a lot of kd-tree nodes will be allocated
3195 int get leafSize;
3296
33- /// Data type for [points] matrix
97+ /// Data type for internal representation of [points]
3498 DType get dtype;
3599
36100 /// Returns [k] nearest neighbours for [point]
0 commit comments