Skip to content

Commit 1250a76

Browse files
committed
Fix library to work with new names
The index was renamed from Timescale Vector to diskann within vectorscale.
1 parent 659956d commit 1250a76

File tree

7 files changed

+307
-188
lines changed

7 files changed

+307
-188
lines changed

README.md

Lines changed: 86 additions & 49 deletions
Large diffs are not rendered by default.

nbs/00_vector.ipynb

Lines changed: 63 additions & 54 deletions
Large diffs are not rendered by default.

nbs/01_pgvectorizer.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@
226226
"from langchain.text_splitter import CharacterTextSplitter\n",
227227
"from timescale_vector import client\n",
228228
"from langchain_openai import OpenAIEmbeddings\n",
229-
"from langchain.vectorstores.timescalevector import TimescaleVector\n",
229+
"from langchain_community.vectorstores.timescalevector import TimescaleVector\n",
230230
"from datetime import timedelta"
231231
]
232232
},

nbs/index.ipynb

Lines changed: 112 additions & 47 deletions
Large diffs are not rendered by default.

nbs/tsv_python_getting_started_tutorial.ipynb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"cell_type": "markdown",
1414
"metadata": {},
1515
"source": [
16-
"This notebook shows how to use the PostgreSQL vector database `Timescale Vector` via the [Timescale Vector python client library](https://github.com/timescale/python-vector). You'll learn how to use TimescaleVector for (1) semantic search, (2) time-based vector search, (3) and how to create indexes to speed up queries.\n",
16+
"This notebook shows how to use the PostgreSQL as vector database via the [Python Vector python client library](https://github.com/timescale/python-vector). You'll learn how to use the client for (1) semantic search, (2) time-based vector search, (3) and how to create indexes to speed up queries.\n",
1717
"\n",
1818
"Follow along by downloading the [Jupyter notebook version of this tutorial here](https://github.com/timescale/python-vector/blob/main/nbs/tsv_python_getting_started_tutorial.ipynb).\n",
1919
"\n",
@@ -914,7 +914,7 @@
914914
"\n",
915915
"Important note: In PostgreSQL, each table can only have one index on a particular column. So if you'd like to test the performance of different index types, you can do so either by (1) creating multiple tables with different indexes, (2) creating multiple vector columns in the same table and creating different indexes on each column, or (3) by dropping and recreating the index on the same column and comparing results.\n",
916916
"\n",
917-
"Let's look at how to create each type of index in Timescale Vector, starting with the TimescaleVector (DiskANN) index."
917+
"Let's look at how to create each type of index, starting with the StreamingDiskANN index."
918918
]
919919
},
920920
{
@@ -924,7 +924,7 @@
924924
"outputs": [],
925925
"source": [
926926
"# Create a timescale vector (DiskANN) search index on the embedding column\n",
927-
"await vec.create_embedding_index(client.TimescaleVectorIndex())"
927+
"await vec.create_embedding_index(client.DiskAnnIndex())"
928928
]
929929
},
930930
{
@@ -974,7 +974,7 @@
974974
"outputs": [],
975975
"source": [
976976
"await vec.drop_embedding_index()\n",
977-
"await vec.create_embedding_index(client.TimescaleVectorIndex())"
977+
"await vec.create_embedding_index(client.DiskAnnIndex())"
978978
]
979979
},
980980
{
@@ -993,13 +993,13 @@
993993
"- Finding the most recent embeddings that are similar to a query vector (e.g recent news).\n",
994994
"- Constraining similarity search to a relevant time range (e.g asking time-based questions about a knowledge base)\n",
995995
"\n",
996-
"Let's look at how to run similarity searches with time range filters using the TimescaleVector client.\n",
996+
"Let's look at how to run similarity searches with time range filters using the client.\n",
997997
"\n",
998998
"- The first step to using time filtering with Timescale Vector is to create a table with the `time_partition_interval` argument set to the desired time interval. This will automatically partition the table into time-based chunks to speed up queries. We completed this step in Part 1 above.\n",
999999
"\n",
10001000
"- Next, we ensure the `id` of our row is a `uuid` with a datetime portion that reflects the date and time we want to associated with the embedding. We completed this step in Part 2 above, where we used the `uuid_from_time()` method provided by the Timescale Vector library.\n",
10011001
"\n",
1002-
"- Finally, we can run similarity searches with time range filters using the TimescaleVector client. We'll illustrate this below."
1002+
"- Finally, we can run similarity searches with time range filters using the client. We'll illustrate this below."
10031003
]
10041004
},
10051005
{

timescale_vector/_modidx.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@
4242
'timescale_vector/client.py'),
4343
'timescale_vector.client.BaseIndex.get_index_method': ( 'vector.html#baseindex.get_index_method',
4444
'timescale_vector/client.py'),
45+
'timescale_vector.client.DiskAnnIndex': ('vector.html#diskannindex', 'timescale_vector/client.py'),
46+
'timescale_vector.client.DiskAnnIndex.__init__': ( 'vector.html#diskannindex.__init__',
47+
'timescale_vector/client.py'),
48+
'timescale_vector.client.DiskAnnIndex.create_index_query': ( 'vector.html#diskannindex.create_index_query',
49+
'timescale_vector/client.py'),
50+
'timescale_vector.client.DiskAnnIndexParams': ( 'vector.html#diskannindexparams',
51+
'timescale_vector/client.py'),
52+
'timescale_vector.client.DiskAnnIndexParams.__init__': ( 'vector.html#diskannindexparams.__init__',
53+
'timescale_vector/client.py'),
4554
'timescale_vector.client.HNSWIndex': ('vector.html#hnswindex', 'timescale_vector/client.py'),
4655
'timescale_vector.client.HNSWIndex.__init__': ( 'vector.html#hnswindex.__init__',
4756
'timescale_vector/client.py'),
@@ -152,16 +161,6 @@
152161
'timescale_vector.client.Sync.table_is_empty': ( 'vector.html#sync.table_is_empty',
153162
'timescale_vector/client.py'),
154163
'timescale_vector.client.Sync.upsert': ('vector.html#sync.upsert', 'timescale_vector/client.py'),
155-
'timescale_vector.client.TimescaleVectorIndex': ( 'vector.html#timescalevectorindex',
156-
'timescale_vector/client.py'),
157-
'timescale_vector.client.TimescaleVectorIndex.__init__': ( 'vector.html#timescalevectorindex.__init__',
158-
'timescale_vector/client.py'),
159-
'timescale_vector.client.TimescaleVectorIndex.create_index_query': ( 'vector.html#timescalevectorindex.create_index_query',
160-
'timescale_vector/client.py'),
161-
'timescale_vector.client.TimescaleVectorIndexParams': ( 'vector.html#timescalevectorindexparams',
162-
'timescale_vector/client.py'),
163-
'timescale_vector.client.TimescaleVectorIndexParams.__init__': ( 'vector.html#timescalevectorindexparams.__init__',
164-
'timescale_vector/client.py'),
165164
'timescale_vector.client.UUIDTimeRange': ( 'vector.html#uuidtimerange',
166165
'timescale_vector/client.py'),
167166
'timescale_vector.client.UUIDTimeRange.__init__': ( 'vector.html#uuidtimerange.__init__',

timescale_vector/client.py

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
# %% auto 0
44
__all__ = ['SEARCH_RESULT_ID_IDX', 'SEARCH_RESULT_METADATA_IDX', 'SEARCH_RESULT_CONTENTS_IDX', 'SEARCH_RESULT_EMBEDDING_IDX',
5-
'SEARCH_RESULT_DISTANCE_IDX', 'uuid_from_time', 'BaseIndex', 'IvfflatIndex', 'HNSWIndex',
6-
'TimescaleVectorIndex', 'QueryParams', 'TimescaleVectorIndexParams', 'IvfflatIndexParams', 'HNSWIndexParams',
7-
'UUIDTimeRange', 'Predicates', 'QueryBuilder', 'Async', 'Sync']
5+
'SEARCH_RESULT_DISTANCE_IDX', 'uuid_from_time', 'BaseIndex', 'IvfflatIndex', 'HNSWIndex', 'DiskAnnIndex',
6+
'QueryParams', 'DiskAnnIndexParams', 'IvfflatIndexParams', 'HNSWIndexParams', 'UUIDTimeRange', 'Predicates',
7+
'QueryBuilder', 'Async', 'Sync']
88

99
# %% ../nbs/00_vector.ipynb 5
1010
import asyncpg
@@ -153,44 +153,48 @@ def create_index_query(self, table_name_quoted:str, column_name_quoted: str, ind
153153
return "CREATE INDEX {index_name} ON {table_name} USING hnsw ({column_name} {index_method}) {with_clause};"\
154154
.format(index_name=index_name_quoted, table_name=table_name_quoted, column_name=column_name_quoted, index_method=index_method, with_clause=with_clause)
155155

156-
class TimescaleVectorIndex(BaseIndex):
156+
class DiskAnnIndex(BaseIndex):
157157
def __init__(self,
158-
use_pq: Optional[bool] = None,
159-
num_neighbors: Optional[int] = None,
160158
search_list_size: Optional[int] = None,
159+
num_neighbors: Optional[int] = None,
161160
max_alpha: Optional[float] = None,
162-
pq_vector_length: Optional[int] = None,
161+
storage_layout: Optional[str] = None,
162+
num_dimensions: Optional[int] = None,
163+
num_bits_per_dimension: Optional[int] = None,
163164
) -> None:
164165
"""
165166
Timescale's vector index.
166167
"""
167-
self.use_pq = use_pq
168-
self.num_neighbors = num_neighbors
169168
self.search_list_size = search_list_size
169+
self.num_neighbors = num_neighbors
170170
self.max_alpha = max_alpha
171-
self.pq_vector_length = pq_vector_length
171+
self.storage_layout = storage_layout
172+
self.num_dimensions = num_dimensions
173+
self.num_bits_per_dimension = num_bits_per_dimension
172174

173175
def create_index_query(self, table_name_quoted:str, column_name_quoted: str, index_name_quoted: str, distance_type: str, num_records_callback: Callable[[], int]) -> str:
174176
if distance_type != "<=>":
175177
raise ValueError(f"Timescale's vector index only supports cosine distance, but distance_type was {distance_type}")
176178

177179
with_clauses = []
178-
if self.use_pq is not None:
179-
with_clauses.append(f"use_pq = {self.use_pq}")
180-
if self.num_neighbors is not None:
181-
with_clauses.append(f"num_neighbors = {self.num_neighbors}")
182180
if self.search_list_size is not None:
183181
with_clauses.append(f"search_list_size = {self.search_list_size}")
182+
if self.num_neighbors is not None:
183+
with_clauses.append(f"num_neighbors = {self.num_neighbors}")
184184
if self.max_alpha is not None:
185185
with_clauses.append(f"max_alpha = {self.max_alpha}")
186-
if self.pq_vector_length is not None:
187-
with_clauses.append(f"pq_vector_length = {self.pq_vector_length}")
186+
if self.storage_layout is not None:
187+
with_clauses.append(f"storage_layout = {self.storage_layout}")
188+
if self.num_dimensions is not None:
189+
with_clauses.append(f"num_dimensions = {self.num_dimensions}")
190+
if self.num_bits_per_dimension is not None:
191+
with_clauses.append(f"num_bits_per_dimension = {self.num_bits_per_dimension}")
188192

189193
with_clause = ""
190194
if len(with_clauses) > 0:
191195
with_clause = "WITH (" + ", ".join(with_clauses) + ")"
192196

193-
return "CREATE INDEX {index_name} ON {table_name} USING tsv ({column_name}) {with_clause};"\
197+
return "CREATE INDEX {index_name} ON {table_name} USING diskann ({column_name}) {with_clause};"\
194198
.format(index_name=index_name_quoted, table_name=table_name_quoted, column_name=column_name_quoted, with_clause=with_clause)
195199

196200

@@ -202,9 +206,14 @@ def __init__(self, params: dict[str, Any]) -> None:
202206
def get_statements(self) -> List[str]:
203207
return ["SET LOCAL " + key + " = " + str(value) for key, value in self.params.items()]
204208

205-
class TimescaleVectorIndexParams(QueryParams):
206-
def __init__(self, search_list_size: int) -> None:
207-
super().__init__({"tsv.query_search_list_size": search_list_size})
209+
class DiskAnnIndexParams(QueryParams):
210+
def __init__(self, search_list_size: Optional[int] = None, rescore: Optional[int] = None) -> None:
211+
params = {}
212+
if search_list_size is not None:
213+
params["diskann.query_search_list_size"] = search_list_size
214+
if rescore is not None:
215+
params["diskann.query_rescore"] = rescore
216+
super().__init__(params)
208217

209218
class IvfflatIndexParams(QueryParams):
210219
def __init__(self, probes: int) -> None:
@@ -602,7 +611,7 @@ def get_create_query(self):
602611
)
603612
return '''
604613
CREATE EXTENSION IF NOT EXISTS vector;
605-
CREATE EXTENSION IF NOT EXISTS timescale_vector;
614+
CREATE EXTENSION IF NOT EXISTS vectorscale;
606615
607616
608617
CREATE TABLE IF NOT EXISTS {table_name} (

0 commit comments

Comments
 (0)