@@ -1684,10 +1684,6 @@ def ingest_vectors_udf(
16841684 trace_id : Optional [str ] = None ,
16851685 distance_metric : vspy .DistanceMetric = vspy .DistanceMetric .SUM_OF_SQUARES ,
16861686 ):
1687- import os
1688- import random
1689- import tempfile
1690-
16911687 import tiledb .cloud
16921688 from tiledb .vector_search .module import StdVector_u64
16931689 from tiledb .vector_search .module import array_to_matrix
@@ -1702,11 +1698,6 @@ def ingest_vectors_udf(
17021698 partial_write_array_ids_uri = partial_write_array_group [IDS_ARRAY_NAME ].uri
17031699 partial_write_array_parts_uri = partial_write_array_group [PARTS_ARRAY_NAME ].uri
17041700 partial_write_array_index_uri = partial_write_array_group [INDEX_ARRAY_NAME ].uri
1705- # Temporary solution until `ivf_index` library change gets released.
1706- # TODO(nikos) remove this when the `partition_start` parameter of `ivf_index` gets released.
1707- partial_write_array_index_array = tiledb .open (
1708- partial_write_array_index_uri , "w" , timestamp = index_timestamp
1709- )
17101701
17111702 for part in range (start , end , batch ):
17121703 part_end = part + batch
@@ -1715,32 +1706,7 @@ def ingest_vectors_udf(
17151706
17161707 str (part ) + "-" + str (part_end )
17171708 part_id = int (part / batch )
1718-
1719- # Temporary solution until `ivf_index` library change gets released. We create a local disk
1720- # temporary array to hold the partial indices and write them to the respective range in the main array.
1721- # TODO(nikos) remove this when the `partition_start` parameter of `ivf_index` gets released.
1722- partial_write_array_index_tmp_uri = os .path .join (
1723- tempfile .gettempdir (),
1724- f"{ random .randint (0 ,MAX_INT32 )} _{ part_id } " ,
1725- )
1726- index_array_rows_dim = tiledb .Dim (
1727- name = "rows" ,
1728- domain = (0 , MAX_INT32 ),
1729- tile = 100000 ,
1730- dtype = np .dtype (np .int32 ),
1731- )
1732- index_array_dom = tiledb .Domain (index_array_rows_dim )
1733- index_attr = tiledb .Attr (
1734- name = "values" ,
1735- dtype = np .dtype (np .uint64 ),
1736- )
1737- index_schema = tiledb .ArraySchema (
1738- domain = index_array_dom ,
1739- sparse = False ,
1740- attrs = [index_attr ],
1741- )
1742- tiledb .Array .create (partial_write_array_index_tmp_uri , index_schema )
1743- partition_start = part_id * (partitions + 1 )
1709+ part_id * (partitions + 1 )
17441710
17451711 logger .debug ("Input vectors start_pos: %d, end_pos: %d" , part , part_end )
17461712 updated_ids = read_updated_ids (
@@ -1758,12 +1724,11 @@ def ingest_vectors_udf(
17581724 deleted_ids = StdVector_u64 (updated_ids ),
17591725 centroids_uri = centroids_uri ,
17601726 parts_uri = partial_write_array_parts_uri ,
1761- index_array_uri = partial_write_array_index_tmp_uri ,
1762- # index_array_uri=partial_write_array_index_uri,
1727+ index_array_uri = partial_write_array_index_uri ,
17631728 id_uri = partial_write_array_ids_uri ,
17641729 start = part ,
17651730 end = part_end ,
1766- # partition_start=part_id * (partitions + 1),
1731+ partition_start = part_id * (partitions + 1 ),
17671732 nthreads = threads ,
17681733 ** (
17691734 {"timestamp" : index_timestamp }
@@ -1801,12 +1766,11 @@ def ingest_vectors_udf(
18011766 deleted_ids = StdVector_u64 (updated_ids ),
18021767 centroids_uri = centroids_uri ,
18031768 parts_uri = partial_write_array_parts_uri ,
1804- index_array_uri = partial_write_array_index_tmp_uri ,
1805- # index_array_uri=partial_write_array_index_uri,
1769+ index_array_uri = partial_write_array_index_uri ,
18061770 id_uri = partial_write_array_ids_uri ,
18071771 start = part ,
18081772 end = part_end ,
1809- # partition_start=part_id * (partitions + 1),
1773+ partition_start = part_id * (partitions + 1 ),
18101774 nthreads = threads ,
18111775 ** (
18121776 {"timestamp" : index_timestamp }
@@ -1816,15 +1780,6 @@ def ingest_vectors_udf(
18161780 config = config ,
18171781 )
18181782
1819- # Temporary solution until `ivf_index` library change gets released.
1820- # TODO(nikos) remove this when the `partition_start` parameter of `ivf_index` gets released.
1821- with tiledb .open (partial_write_array_index_tmp_uri ) as a :
1822- partial_write_array_index_array [
1823- partition_start : partition_start + partitions + 1
1824- ] = a [0 : partitions + 1 ]
1825- tiledb .Array .delete_array (partial_write_array_index_tmp_uri )
1826- partial_write_array_index_array .close ()
1827-
18281783 def ingest_additions_udf (
18291784 index_group_uri : str ,
18301785 updates_uri : str ,
@@ -1837,10 +1792,6 @@ def ingest_additions_udf(
18371792 verbose : bool = False ,
18381793 trace_id : Optional [str ] = None ,
18391794 ):
1840- import os
1841- import random
1842- import tempfile
1843-
18441795 import tiledb .cloud
18451796 from tiledb .vector_search .module import StdVector_u64
18461797 from tiledb .vector_search .module import array_to_matrix
@@ -1855,30 +1806,6 @@ def ingest_additions_udf(
18551806 partial_write_array_parts_uri = partial_write_array_group [PARTS_ARRAY_NAME ].uri
18561807 partial_write_array_index_uri = partial_write_array_group [INDEX_ARRAY_NAME ].uri
18571808
1858- # Temporary solution until `ivf_index` library change gets released. We create a local disk
1859- # temporary array to hold the partial indices and write them to the respective range in the main array.
1860- # TODO(nikos) remove this when the `partition_start` parameter of `ivf_index` gets released.
1861- partial_write_array_index_tmp_uri = os .path .join (
1862- tempfile .gettempdir (), f"{ random .randint (0 ,MAX_INT32 )} _{ partition_start } "
1863- )
1864- index_array_rows_dim = tiledb .Dim (
1865- name = "rows" ,
1866- domain = (0 , MAX_INT32 ),
1867- tile = 100000 ,
1868- dtype = np .dtype (np .int32 ),
1869- )
1870- index_array_dom = tiledb .Domain (index_array_rows_dim )
1871- index_attr = tiledb .Attr (
1872- name = "values" ,
1873- dtype = np .dtype (np .uint64 ),
1874- )
1875- index_schema = tiledb .ArraySchema (
1876- domain = index_array_dom ,
1877- sparse = False ,
1878- attrs = [index_attr ],
1879- )
1880- tiledb .Array .create (partial_write_array_index_tmp_uri , index_schema )
1881-
18821809 additions_vectors , additions_external_ids = read_additions (
18831810 updates_uri = updates_uri ,
18841811 config = config ,
@@ -1904,29 +1831,16 @@ def ingest_additions_udf(
19041831 deleted_ids = StdVector_u64 (np .array ([], np .uint64 )),
19051832 centroids_uri = centroids_uri ,
19061833 parts_uri = partial_write_array_parts_uri ,
1907- index_array_uri = partial_write_array_index_tmp_uri ,
1908- # index_array_uri=partial_write_array_index_uri,
1834+ index_array_uri = partial_write_array_index_uri ,
19091835 id_uri = partial_write_array_ids_uri ,
19101836 start = write_offset ,
19111837 end = 0 ,
1912- # partition_start=partition_start,
1838+ partition_start = partition_start ,
19131839 nthreads = threads ,
19141840 ** ({"timestamp" : index_timestamp } if index_timestamp is not None else {}),
19151841 config = config ,
19161842 )
19171843
1918- # Temporary solution until `ivf_index` library change gets released.
1919- # TODO(nikos) remove this when the `partition_start` parameter of `ivf_index` gets released.
1920- partial_write_array_index_array = tiledb .open (
1921- partial_write_array_index_uri , "w" , timestamp = index_timestamp
1922- )
1923- with tiledb .open (partial_write_array_index_tmp_uri ) as a :
1924- partial_write_array_index_array [
1925- partition_start : partition_start + partitions + 1
1926- ] = a [0 : partitions + 1 ]
1927- tiledb .Array .delete_array (partial_write_array_index_tmp_uri )
1928- partial_write_array_index_array .close ()
1929-
19301844 def compute_partition_indexes_udf (
19311845 index_group_uri : str ,
19321846 partitions : int ,
0 commit comments