Skip to content

Commit a892636

Browse files
committed
Allow setting an explicit uuid time; allow querying by time
1 parent 7976261 commit a892636

File tree

3 files changed

+314
-21
lines changed

3 files changed

+314
-21
lines changed

nbs/00_vector.ipynb

Lines changed: 185 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,10 @@
7272
"import json\n",
7373
"import numpy as np\n",
7474
"import math\n",
75-
"from datetime import timedelta"
75+
"import random\n",
76+
"from datetime import timedelta\n",
77+
"from datetime import datetime\n",
78+
"import calendar"
7679
]
7780
},
7881
{
@@ -90,6 +93,109 @@
9093
"SEARCH_RESULT_DISTANCE_IDX = 4"
9194
]
9295
},
96+
{
97+
"cell_type": "code",
98+
"execution_count": null,
99+
"metadata": {},
100+
"outputs": [],
101+
"source": [
102+
"#| export\n",
103+
"\n",
104+
"#copied from Cassandra: https://docs.datastax.com/en/drivers/python/3.2/_modules/cassandra/util.html#uuid_from_time\n",
105+
"def uuid_from_time(time_arg=None, node=None, clock_seq=None):\n",
106+
" if time_arg is None:\n",
107+
" return uuid.uuid1(node, clock_seq)\n",
108+
" \"\"\"\n",
109+
" Converts a datetime or timestamp to a type 1 :class:`uuid.UUID`.\n",
110+
"\n",
111+
" :param time_arg:\n",
112+
" The time to use for the timestamp portion of the UUID.\n",
113+
" This can either be a :class:`datetime` object or a timestamp\n",
114+
" in seconds (as returned from :meth:`time.time()`).\n",
115+
" :type datetime: :class:`datetime` or timestamp\n",
116+
"\n",
117+
" :param node:\n",
118+
" None integer for the UUID (up to 48 bits). If not specified, this\n",
119+
" field is randomized.\n",
120+
" :type node: long\n",
121+
"\n",
122+
" :param clock_seq:\n",
123+
" Clock sequence field for the UUID (up to 14 bits). If not specified,\n",
124+
" a random sequence is generated.\n",
125+
" :type clock_seq: int\n",
126+
"\n",
127+
" :rtype: :class:`uuid.UUID`\n",
128+
"\n",
129+
" \"\"\"\n",
130+
" if hasattr(time_arg, 'utctimetuple'):\n",
131+
" seconds = int(calendar.timegm(time_arg.utctimetuple()))\n",
132+
" microseconds = (seconds * 1e6) + time_arg.time().microsecond\n",
133+
" else:\n",
134+
" microseconds = int(time_arg * 1e6)\n",
135+
"\n",
136+
" # 0x01b21dd213814000 is the number of 100-ns intervals between the\n",
137+
" # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.\n",
138+
" intervals = int(microseconds * 10) + 0x01b21dd213814000\n",
139+
"\n",
140+
" time_low = intervals & 0xffffffff\n",
141+
" time_mid = (intervals >> 32) & 0xffff\n",
142+
" time_hi_version = (intervals >> 48) & 0x0fff\n",
143+
"\n",
144+
" if clock_seq is None:\n",
145+
" clock_seq = random.getrandbits(14)\n",
146+
" else:\n",
147+
" if clock_seq > 0x3fff:\n",
148+
" raise ValueError('clock_seq is out of range (need a 14-bit value)')\n",
149+
"\n",
150+
" clock_seq_low = clock_seq & 0xff\n",
151+
" clock_seq_hi_variant = 0x80 | ((clock_seq >> 8) & 0x3f)\n",
152+
"\n",
153+
" if node is None:\n",
154+
" node = random.getrandbits(48)\n",
155+
"\n",
156+
" return uuid.UUID(fields=(time_low, time_mid, time_hi_version,\n",
157+
" clock_seq_hi_variant, clock_seq_low, node), version=1)"
158+
]
159+
},
160+
{
161+
"cell_type": "code",
162+
"execution_count": null,
163+
"metadata": {},
164+
"outputs": [],
165+
"source": [
166+
"#| export\n",
167+
"class UUIDTimeRange:\n",
168+
" def __init__(self, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None, start_inclusive=True, end_inclusive=False):\n",
169+
" if start_date is not None and end_date is not None:\n",
170+
" if start_date > end_date:\n",
171+
" raise Exception(\"start_date must be before end_date\")\n",
172+
" \n",
173+
" if start_date is None and end_date is None:\n",
174+
" raise Exception(\"start_date and end_date cannot both be None\")\n",
175+
"\n",
176+
" self.start_date = start_date\n",
177+
" self.end_date = end_date\n",
178+
" self.start_inclusive = start_inclusive\n",
179+
" self.end_inclusive = end_inclusive\n",
180+
"\n",
181+
" def build_query(self, params: List) -> Tuple[str, List]:\n",
182+
" column = \"uuid_timestamp(id)\"\n",
183+
" queries = []\n",
184+
" if self.start_date is not None:\n",
185+
" if self.start_inclusive:\n",
186+
" queries.append(f\"{column} >= ${len(params)+1}\")\n",
187+
" else:\n",
188+
" queries.append(f\"{column} > ${len(params)+1}\")\n",
189+
" params.append(self.start_date)\n",
190+
" if self.end_date is not None:\n",
191+
" if self.end_inclusive:\n",
192+
" queries.append(f\"{column} <= ${len(params)+1}\")\n",
193+
" else:\n",
194+
" queries.append(f\"{column} < ${len(params)+1}\")\n",
195+
" params.append(self.end_date)\n",
196+
" return \" AND \".join(queries), params "
197+
]
198+
},
93199
{
94200
"cell_type": "code",
95201
"execution_count": null,
@@ -461,7 +567,14 @@
461567
"\n",
462568
" return (where, params)\n",
463569
"\n",
464-
" def search_query(self, query_embedding: Optional[Union[List[float], np.ndarray]], limit: int = 10, filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None, predicates: Optional[Predicates] = None) -> Tuple[str, List]:\n",
570+
" def search_query(\n",
571+
" self, \n",
572+
" query_embedding: Optional[Union[List[float], np.ndarray]], \n",
573+
" limit: int = 10, \n",
574+
" filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None, \n",
575+
" predicates: Optional[Predicates] = None,\n",
576+
" uuid_time_filter: Optional[UUIDTimeRange] = None,\n",
577+
" ) -> Tuple[str, List]:\n",
465578
" \"\"\"\n",
466579
" Generates a similarity query.\n",
467580
"\n",
@@ -487,6 +600,13 @@
487600
" if predicates is not None:\n",
488601
" (where_predicates, params) = predicates.build_query(params)\n",
489602
" where_clauses.append(where_predicates)\n",
603+
"\n",
604+
" if uuid_time_filter is not None:\n",
605+
" if self.time_partition_interval is None:\n",
606+
" raise ValueError(\"\"\"uuid_time_filter is only supported when time_partitioning is enabled.\"\"\")\n",
607+
" \n",
608+
" (where_time, params) = uuid_time_filter.build_query(params)\n",
609+
" where_clauses.append(where_time)\n",
490610
" \n",
491611
" if len(where_clauses) > 0:\n",
492612
" where = \" AND \".join(where_clauses)\n",
@@ -784,7 +904,9 @@
784904
" query_embedding: Optional[List[float]] = None, \n",
785905
" limit: int = 10,\n",
786906
" filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None,\n",
787-
" predicates: Optional[Predicates] = None): \n",
907+
" predicates: Optional[Predicates] = None,\n",
908+
" uuid_time_filter: Optional[UUIDTimeRange] = None,\n",
909+
" ): \n",
788910
" \"\"\"\n",
789911
" Retrieves similar records using a similarity query.\n",
790912
"\n",
@@ -804,7 +926,7 @@
804926
" List: List of similar records.\n",
805927
" \"\"\"\n",
806928
" (query, params) = self.builder.search_query(\n",
807-
" query_embedding, limit, filter, predicates)\n",
929+
" query_embedding, limit, filter, predicates, uuid_time_filter)\n",
808930
" async with await self.connect() as pool:\n",
809931
" return await pool.fetch(query, *params)"
810932
]
@@ -892,6 +1014,18 @@
8921014
"execution_count": null,
8931015
"metadata": {},
8941016
"outputs": [
1017+
{
1018+
"name": "stderr",
1019+
"output_type": "stream",
1020+
"text": [
1021+
"/Users/cevian/.pyenv/versions/3.11.4/envs/nbdev_env/lib/python3.11/site-packages/fastcore/docscrape.py:225: UserWarning: potentially wrong underline length... \n",
1022+
"Returns \n",
1023+
"-------- in \n",
1024+
"Retrieves similar records using a similarity query.\n",
1025+
"...\n",
1026+
" else: warn(msg)\n"
1027+
]
1028+
},
8951029
{
8961030
"data": {
8971031
"text/markdown": [
@@ -903,7 +1037,8 @@
9031037
"\n",
9041038
"> Async.search (query_embedding:Optional[List[float]]=None, limit:int=10,\n",
9051039
"> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=No\n",
906-
"> ne, predicates:Optional[__main__.Predicates]=None)\n",
1040+
"> ne, predicates:Optional[__main__.Predicates]=None,\n",
1041+
"> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)\n",
9071042
"\n",
9081043
"Retrieves similar records using a similarity query.\n",
9091044
"\n",
@@ -913,6 +1048,7 @@
9131048
"| limit | int | 10 | The number of nearest neighbors to retrieve. |\n",
9141049
"| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |\n",
9151050
"| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \\|, and ~). |\n",
1051+
"| uuid_time_filter | Optional | None | |\n",
9161052
"| **Returns** | **List: List of similar records.** | | |"
9171053
],
9181054
"text/plain": [
@@ -924,7 +1060,8 @@
9241060
"\n",
9251061
"> Async.search (query_embedding:Optional[List[float]]=None, limit:int=10,\n",
9261062
"> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=No\n",
927-
"> ne, predicates:Optional[__main__.Predicates]=None)\n",
1063+
"> ne, predicates:Optional[__main__.Predicates]=None,\n",
1064+
"> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)\n",
9281065
"\n",
9291066
"Retrieves similar records using a similarity query.\n",
9301067
"\n",
@@ -934,6 +1071,7 @@
9341071
"| limit | int | 10 | The number of nearest neighbors to retrieve. |\n",
9351072
"| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |\n",
9361073
"| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \\|, and ~). |\n",
1074+
"| uuid_time_filter | Optional | None | |\n",
9371075
"| **Returns** | **List: List of similar records.** | | |"
9381076
]
9391077
},
@@ -1140,6 +1278,21 @@
11401278
" raised = True\n",
11411279
"assert raised\n",
11421280
"\n",
1281+
"import datetime\n",
1282+
"specific_datetime = datetime.datetime(2018, 8, 10, 15, 30, 0)\n",
1283+
"await vec.upsert([\n",
1284+
" # current time\n",
1285+
" (uuid.uuid1(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n",
1286+
" #time in 2018\n",
1287+
" (uuid_from_time(specific_datetime), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n",
1288+
"])\n",
1289+
"assert not await vec.table_is_empty()\n",
1290+
"rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7)))\n",
1291+
"assert len(rec) == 1\n",
1292+
"rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2)))\n",
1293+
"assert len(rec) == 0\n",
1294+
"rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7)))\n",
1295+
"assert len(rec) == 2\n",
11431296
"await vec.drop_table()\n",
11441297
"await vec.close()"
11451298
]
@@ -1458,7 +1611,9 @@
14581611
" query_embedding: Optional[List[float]] = None, \n",
14591612
" limit: int = 10, \n",
14601613
" filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None,\n",
1461-
" predicates: Optional[Predicates] = None):\n",
1614+
" predicates: Optional[Predicates] = None,\n",
1615+
" uuid_time_filter: Optional[UUIDTimeRange] = None,\n",
1616+
" ):\n",
14621617
" \"\"\"\n",
14631618
" Retrieves similar records using a similarity query.\n",
14641619
"\n",
@@ -1483,7 +1638,7 @@
14831638
" query_embedding_np = None\n",
14841639
"\n",
14851640
" (query, params) = self.builder.search_query(\n",
1486-
" query_embedding_np, limit, filter, predicates)\n",
1641+
" query_embedding_np, limit, filter, predicates, uuid_time_filter)\n",
14871642
" query, params = self._translate_to_pyformat(query, params)\n",
14881643
" with self.connect() as conn:\n",
14891644
" with conn.cursor() as cur:\n",
@@ -1595,7 +1750,8 @@
15951750
"\n",
15961751
"> Sync.search (query_embedding:Optional[List[float]]=None, limit:int=10,\n",
15971752
"> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=Non\n",
1598-
"> e, predicates:Optional[__main__.Predicates]=None)\n",
1753+
"> e, predicates:Optional[__main__.Predicates]=None,\n",
1754+
"> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)\n",
15991755
"\n",
16001756
"Retrieves similar records using a similarity query.\n",
16011757
"\n",
@@ -1605,6 +1761,7 @@
16051761
"| limit | int | 10 | The number of nearest neighbors to retrieve. |\n",
16061762
"| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |\n",
16071763
"| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \\|, and ~). |\n",
1764+
"| uuid_time_filter | Optional | None | |\n",
16081765
"| **Returns** | **List: List of similar records.** | | |"
16091766
],
16101767
"text/plain": [
@@ -1616,7 +1773,8 @@
16161773
"\n",
16171774
"> Sync.search (query_embedding:Optional[List[float]]=None, limit:int=10,\n",
16181775
"> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=Non\n",
1619-
"> e, predicates:Optional[__main__.Predicates]=None)\n",
1776+
"> e, predicates:Optional[__main__.Predicates]=None,\n",
1777+
"> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)\n",
16201778
"\n",
16211779
"Retrieves similar records using a similarity query.\n",
16221780
"\n",
@@ -1626,6 +1784,7 @@
16261784
"| limit | int | 10 | The number of nearest neighbors to retrieve. |\n",
16271785
"| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |\n",
16281786
"| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \\|, and ~). |\n",
1787+
"| uuid_time_filter | Optional | None | |\n",
16291788
"| **Returns** | **List: List of similar records.** | | |"
16301789
]
16311790
},
@@ -1809,6 +1968,22 @@
18091968
"except BaseException as e:\n",
18101969
" raised = True\n",
18111970
"assert raised\n",
1971+
"\n",
1972+
"import datetime\n",
1973+
"specific_datetime = datetime.datetime(2018, 8, 10, 15, 30, 0)\n",
1974+
"vec.upsert([\n",
1975+
" # current time\n",
1976+
" (uuid.uuid1(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n",
1977+
" #time in 2018\n",
1978+
" (uuid_from_time(specific_datetime), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n",
1979+
"])\n",
1980+
"assert not vec.table_is_empty()\n",
1981+
"rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7)))\n",
1982+
"assert len(rec) == 1\n",
1983+
"rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2)))\n",
1984+
"assert len(rec) == 0\n",
1985+
"rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7)))\n",
1986+
"assert len(rec) == 2\n",
18121987
"vec.drop_table()\n",
18131988
"vec.close()"
18141989
]

timescale_vector/_modidx.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,4 +119,12 @@
119119
'timescale_vector.client.Sync.search': ('vector.html#sync.search', 'timescale_vector/client.py'),
120120
'timescale_vector.client.Sync.table_is_empty': ( 'vector.html#sync.table_is_empty',
121121
'timescale_vector/client.py'),
122-
'timescale_vector.client.Sync.upsert': ('vector.html#sync.upsert', 'timescale_vector/client.py')}}}
122+
'timescale_vector.client.Sync.upsert': ('vector.html#sync.upsert', 'timescale_vector/client.py'),
123+
'timescale_vector.client.UUIDTimeRange': ( 'vector.html#uuidtimerange',
124+
'timescale_vector/client.py'),
125+
'timescale_vector.client.UUIDTimeRange.__init__': ( 'vector.html#uuidtimerange.__init__',
126+
'timescale_vector/client.py'),
127+
'timescale_vector.client.UUIDTimeRange.build_query': ( 'vector.html#uuidtimerange.build_query',
128+
'timescale_vector/client.py'),
129+
'timescale_vector.client.uuid_from_time': ( 'vector.html#uuid_from_time',
130+
'timescale_vector/client.py')}}}

0 commit comments

Comments
 (0)