@@ -47,7 +47,7 @@ def ingest(
4747 storage_version : str = STORAGE_VERSION ,
4848 verbose : bool = False ,
4949 trace_id : Optional [str ] = None ,
50- use_sklearn : bool = False ,
50+ use_sklearn : bool = True ,
5151 mode : Mode = Mode .LOCAL ,
5252 ** kwargs ,
5353):
@@ -129,7 +129,7 @@ def ingest(
129129 trace ID for logging, defaults to None
130130 use_sklearn: bool
131131 Whether to use scikit-learn's implementation of k-means clustering instead of
132- tiledb.vector_search's. Defaults to false .
132+ tiledb.vector_search's. Defaults to true .
133133 mode: Mode
134134 execution mode, defaults to LOCAL use BATCH for distributed execution
135135 """
@@ -933,7 +933,7 @@ def centralised_kmeans(
933933 config : Optional [Mapping [str , Any ]] = None ,
934934 verbose : bool = False ,
935935 trace_id : Optional [str ] = None ,
936- use_sklearn : bool = False
936+ use_sklearn : bool = True
937937 ):
938938 from sklearn .cluster import KMeans
939939
@@ -1044,7 +1044,7 @@ def assign_points_and_partial_new_centroids(
10441044 config : Optional [Mapping [str , Any ]] = None ,
10451045 verbose : bool = False ,
10461046 trace_id : Optional [str ] = None ,
1047- use_sklearn : bool = False ,
1047+ use_sklearn : bool = True ,
10481048 ):
10491049 import tiledb .cloud
10501050 from sklearn .cluster import KMeans
@@ -1692,7 +1692,7 @@ def create_ingestion_dag(
16921692 config : Optional [Mapping [str , Any ]] = None ,
16931693 verbose : bool = False ,
16941694 trace_id : Optional [str ] = None ,
1695- use_sklearn : bool = False ,
1695+ use_sklearn : bool = True ,
16961696 mode : Mode = Mode .LOCAL ,
16971697 ) -> dag .DAG :
16981698 if mode == Mode .BATCH :
0 commit comments