Skip to content

Commit bcfcab9

Browse files
committed
Finish time-partitioning testing
1 parent 8ada562 commit bcfcab9

File tree

3 files changed

+147
-49
lines changed

3 files changed

+147
-49
lines changed

nbs/00_vector.ipynb

Lines changed: 108 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,7 @@
4444
"cell_type": "code",
4545
"execution_count": null,
4646
"metadata": {},
47-
"outputs": [
48-
{
49-
"data": {
50-
"text/plain": [
51-
"'postgres://tsdbadmin:[email protected]:36462/tsdb?sslmode=require'"
52-
]
53-
},
54-
"execution_count": null,
55-
"metadata": {},
56-
"output_type": "execute_result"
57-
}
58-
],
47+
"outputs": [],
5948
"source": [
6049
"_ = load_dotenv(find_dotenv())\n",
6150
"service_url = os.environ['TIMESCALE_SERVICE_URL']"
@@ -79,7 +68,7 @@
7968
"import asyncpg\n",
8069
"import uuid\n",
8170
"from pgvector.asyncpg import register_vector\n",
82-
"from typing import (List, Optional, Union, Dict, Tuple, Any)\n",
71+
"from typing import (List, Optional, Union, Dict, Tuple, Any, Iterable)\n",
8372
"import json\n",
8473
"import numpy as np\n",
8574
"import math\n",
@@ -266,6 +255,9 @@
266255
" if id_type.lower() != 'uuid' and id_type.lower() != 'text':\n",
267256
" raise ValueError(f\"unrecognized id_type {id_type}\")\n",
268257
"\n",
258+
" if time_partition_interval is not None and id_type.lower() != 'uuid':\n",
259+
" raise ValueError(f\"time partitioning is only supported for uuid id_type\")\n",
260+
"\n",
269261
" self.id_type = id_type.lower()\n",
270262
" self.time_partition_interval = time_partition_interval\n",
271263
"\n",
@@ -521,7 +513,7 @@
521513
"text/markdown": [
522514
"---\n",
523515
"\n",
524-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L222){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
516+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L225){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
525517
"\n",
526518
"### QueryBuilder.get_create_query\n",
527519
"\n",
@@ -532,7 +524,7 @@
532524
"text/plain": [
533525
"---\n",
534526
"\n",
535-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L222){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
527+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L225){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
536528
"\n",
537529
"### QueryBuilder.get_create_query\n",
538530
"\n",
@@ -597,6 +589,7 @@
597589
" self.service_url = service_url\n",
598590
" self.pool = None\n",
599591
" self.max_db_connections = max_db_connections\n",
592+
" self.time_partition_interval = time_partition_interval\n",
600593
"\n",
601594
" async def _default_max_db_connections(self) -> int:\n",
602595
" \"\"\"\n",
@@ -653,6 +646,19 @@
653646
" rec = await pool.fetchrow(query)\n",
654647
" return rec == None\n",
655648
"\n",
649+
" def munge_record(self, records) -> Iterable[Tuple[uuid.UUID, str, str, List[float]]]:\n",
650+
" if self.time_partition_interval is not None:\n",
651+
" for record in records:\n",
652+
" id = record[0]\n",
653+
" if id.variant != uuid.RFC_4122 or id.version != 1:\n",
654+
" raise ValueError(\"When using time partitioning, id must be a v1 uuid\")\n",
655+
"\n",
656+
" metadata_is_dict = isinstance(records[0][1], dict)\n",
657+
" if metadata_is_dict:\n",
658+
" records = map(lambda item: Async._convert_record_meta_to_json(item), records)\n",
659+
"\n",
660+
" return records \n",
661+
"\n",
656662
" def _convert_record_meta_to_json(item):\n",
657663
" if not isinstance(item[1], dict):\n",
658664
" raise ValueError(\n",
@@ -672,8 +678,7 @@
672678
" -------\n",
673679
" None\n",
674680
" \"\"\"\n",
675-
" if isinstance(records[0][1], dict):\n",
676-
" records = map(lambda item: Async._convert_record_meta_to_json(item), records)\n",
681+
" records = self.munge_record(records)\n",
677682
" query = self.builder.get_upsert_query()\n",
678683
" async with await self.connect() as pool:\n",
679684
" await pool.executemany(query, records)\n",
@@ -817,7 +822,7 @@
817822
"text/markdown": [
818823
"---\n",
819824
"\n",
820-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L458){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
825+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L533){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
821826
"\n",
822827
"### Async.create_tables\n",
823828
"\n",
@@ -828,7 +833,7 @@
828833
"text/plain": [
829834
"---\n",
830835
"\n",
831-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L458){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
836+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L533){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
832837
"\n",
833838
"### Async.create_tables\n",
834839
"\n",
@@ -856,7 +861,7 @@
856861
"text/markdown": [
857862
"---\n",
858863
"\n",
859-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L458){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
864+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L533){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
860865
"\n",
861866
"### Async.create_tables\n",
862867
"\n",
@@ -867,7 +872,7 @@
867872
"text/plain": [
868873
"---\n",
869874
"\n",
870-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L458){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
875+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L533){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
871876
"\n",
872877
"### Async.create_tables\n",
873878
"\n",
@@ -907,7 +912,7 @@
907912
"text/markdown": [
908913
"---\n",
909914
"\n",
910-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L556){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
915+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L633){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
911916
"\n",
912917
"### Async.search\n",
913918
"\n",
@@ -928,7 +933,7 @@
928933
"text/plain": [
929934
"---\n",
930935
"\n",
931-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L556){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
936+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L633){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
932937
"\n",
933938
"### Async.search\n",
934939
"\n",
@@ -1072,26 +1077,27 @@
10721077
"rec = await vec.search(limit=4, predicates=~Predicates((\"key\", \"val2\"), (\"key_10\", \"<\", 100)))\n",
10731078
"assert len(rec) == 4\n",
10741079
"\n",
1075-
"\n",
1080+
"raised = False\n",
10761081
"try:\n",
10771082
" # can't upsert using both keys and dictionaries\n",
10781083
" await vec.upsert([\n",
10791084
" (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n",
10801085
" (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2])\n",
10811086
" ])\n",
1082-
" assert False\n",
10831087
"except ValueError as e:\n",
1084-
" pass\n",
1088+
" raised = True\n",
1089+
"assert raised\n",
10851090
"\n",
1091+
"raised = False\n",
10861092
"try:\n",
10871093
" # can't upsert using both keys and dictionaries opposite order\n",
10881094
" await vec.upsert([\n",
10891095
" (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2]),\n",
10901096
" (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n",
10911097
" ])\n",
1092-
" assert False\n",
10931098
"except BaseException as e:\n",
1094-
" pass\n",
1099+
" raised = True\n",
1100+
"assert raised\n",
10951101
"\n",
10961102
"rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n",
10971103
"assert len(rec) == 2\n",
@@ -1125,6 +1131,31 @@
11251131
"empty = await vec.table_is_empty()\n",
11261132
"assert empty\n",
11271133
"await vec.drop_table()\n",
1134+
"await vec.close()\n",
1135+
"\n",
1136+
"vec = Async(service_url, \"data_table\", 2, time_partition_interval=timedelta(seconds=60))\n",
1137+
"await vec.create_tables()\n",
1138+
"empty = await vec.table_is_empty()\n",
1139+
"assert empty\n",
1140+
"id = uuid.uuid1()\n",
1141+
"await vec.upsert([(id, {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n",
1142+
"empty = await vec.table_is_empty()\n",
1143+
"assert not empty\n",
1144+
"await vec.delete_by_ids([id])\n",
1145+
"empty = await vec.table_is_empty()\n",
1146+
"assert empty\n",
1147+
"\n",
1148+
"raised = False\n",
1149+
"try:\n",
1150+
" # can't upsert with uuid type 4 in time partitioned table\n",
1151+
" await vec.upsert([\n",
1152+
" (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n",
1153+
" ])\n",
1154+
"except BaseException as e:\n",
1155+
" raised = True\n",
1156+
"assert raised\n",
1157+
"\n",
1158+
"await vec.drop_table()\n",
11281159
"await vec.close()"
11291160
]
11301161
},
@@ -1192,6 +1223,7 @@
11921223
" self.service_url = service_url\n",
11931224
" self.pool = None\n",
11941225
" self.max_db_connections = max_db_connections\n",
1226+
" self.time_partition_interval = time_partition_interval\n",
11951227
" psycopg2.extras.register_uuid()\n",
11961228
"\n",
11971229
" def default_max_db_connections(self):\n",
@@ -1285,6 +1317,20 @@
12851317
" cur.execute(query)\n",
12861318
" rec = cur.fetchone()\n",
12871319
" return rec == None\n",
1320+
" \n",
1321+
" def munge_record(self, records) -> Iterable[Tuple[uuid.UUID, str, str, List[float]]]:\n",
1322+
" if self.time_partition_interval is not None:\n",
1323+
" for record in records:\n",
1324+
" id = record[0]\n",
1325+
" if id.variant != uuid.RFC_4122 or id.version != 1:\n",
1326+
" raise ValueError(\"When using time partitioning, id must be a v1 uuid\")\n",
1327+
"\n",
1328+
" metadata_is_dict = isinstance(records[0][1], dict)\n",
1329+
" if metadata_is_dict:\n",
1330+
" records = map(lambda item: Sync._convert_record_meta_to_json(item), records)\n",
1331+
"\n",
1332+
" return records\n",
1333+
"\n",
12881334
"\n",
12891335
" def _convert_record_meta_to_json(item):\n",
12901336
" if not isinstance(item[1], dict):\n",
@@ -1305,10 +1351,7 @@
13051351
" -------\n",
13061352
" None\n",
13071353
" \"\"\"\n",
1308-
" if isinstance(records[0][1], dict):\n",
1309-
" records = list(\n",
1310-
" map(lambda item: Async._convert_record_meta_to_json(item), records))\n",
1311-
"\n",
1354+
" records = self.munge_record(records)\n",
13121355
" query = self.builder.get_upsert_query()\n",
13131356
" query, _ = self._translate_to_pyformat(query, None)\n",
13141357
" with self.connect() as conn:\n",
@@ -1479,7 +1522,7 @@
14791522
"text/markdown": [
14801523
"---\n",
14811524
"\n",
1482-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L727){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
1525+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L827){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
14831526
"\n",
14841527
"### Sync.create_tables\n",
14851528
"\n",
@@ -1490,7 +1533,7 @@
14901533
"text/plain": [
14911534
"---\n",
14921535
"\n",
1493-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L727){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
1536+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L827){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
14941537
"\n",
14951538
"### Sync.create_tables\n",
14961539
"\n",
@@ -1518,7 +1561,7 @@
15181561
"text/markdown": [
15191562
"---\n",
15201563
"\n",
1521-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L704){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
1564+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L804){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
15221565
"\n",
15231566
"### Sync.upsert\n",
15241567
"\n",
@@ -1534,7 +1577,7 @@
15341577
"text/plain": [
15351578
"---\n",
15361579
"\n",
1537-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L704){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
1580+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L804){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
15381581
"\n",
15391582
"### Sync.upsert\n",
15401583
"\n",
@@ -1567,7 +1610,7 @@
15671610
"text/markdown": [
15681611
"---\n",
15691612
"\n",
1570-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L841){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
1613+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L944){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
15711614
"\n",
15721615
"### Sync.search\n",
15731616
"\n",
@@ -1588,7 +1631,7 @@
15881631
"text/plain": [
15891632
"---\n",
15901633
"\n",
1591-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L841){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
1634+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L944){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
15921635
"\n",
15931636
"### Sync.search\n",
15941637
"\n",
@@ -1703,25 +1746,27 @@
17031746
" \"key2\": \"val2\"}, {\"no such key\": \"no such val\"}])\n",
17041747
"assert len(rec) == 2\n",
17051748
"\n",
1749+
"raised = False\n",
17061750
"try:\n",
17071751
" # can't upsert using both keys and dictionaries\n",
17081752
" await vec.upsert([\n",
17091753
" (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n",
17101754
" (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2])\n",
17111755
" ])\n",
1712-
" assert False\n",
17131756
"except ValueError as e:\n",
1714-
" pass\n",
1757+
" raised = True\n",
1758+
"assert raised\n",
17151759
"\n",
1760+
"raised = False\n",
17161761
"try:\n",
17171762
" # can't upsert using both keys and dictionaries opposite order\n",
17181763
" await vec.upsert([\n",
17191764
" (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2]),\n",
17201765
" (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n",
17211766
" ])\n",
1722-
" assert False\n",
17231767
"except BaseException as e:\n",
1724-
" pass\n",
1768+
" raised = True\n",
1769+
"assert raised\n",
17251770
"\n",
17261771
"rec = vec.search([1.0, 2.0], filter={\"key_1\": \"val_1\", \"key_2\": \"val_2\"})\n",
17271772
"assert rec[0][SEARCH_RESULT_CONTENTS_IDX] == 'the brown fox'\n",
@@ -1765,6 +1810,27 @@
17651810
"vec.delete_by_ids([\"Not a valid UUID\"])\n",
17661811
"assert vec.table_is_empty()\n",
17671812
"vec.drop_table()\n",
1813+
"vec.close()\n",
1814+
"\n",
1815+
"vec = Sync(service_url, \"data_table\", 2, time_partition_interval=timedelta(seconds=60))\n",
1816+
"vec.create_tables()\n",
1817+
"assert vec.table_is_empty()\n",
1818+
"id = uuid.uuid1()\n",
1819+
"vec.upsert([(id, {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n",
1820+
"assert not vec.table_is_empty()\n",
1821+
"vec.delete_by_ids([id])\n",
1822+
"assert vec.table_is_empty()\n",
1823+
"raised = False\n",
1824+
"try:\n",
1825+
" # can't upsert with uuid type 4 in time partitioned table\n",
1826+
" vec.upsert([\n",
1827+
" (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n",
1828+
" ])\n",
1829+
" #pass\n",
1830+
"except BaseException as e:\n",
1831+
" raised = True\n",
1832+
"assert raised\n",
1833+
"vec.drop_table()\n",
17681834
"vec.close()"
17691835
]
17701836
},

timescale_vector/_modidx.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
'timescale_vector/client.py'),
3232
'timescale_vector.client.Async.drop_table': ( 'vector.html#async.drop_table',
3333
'timescale_vector/client.py'),
34+
'timescale_vector.client.Async.munge_record': ( 'vector.html#async.munge_record',
35+
'timescale_vector/client.py'),
3436
'timescale_vector.client.Async.search': ('vector.html#async.search', 'timescale_vector/client.py'),
3537
'timescale_vector.client.Async.table_is_empty': ( 'vector.html#async.table_is_empty',
3638
'timescale_vector/client.py'),
@@ -112,6 +114,8 @@
112114
'timescale_vector/client.py'),
113115
'timescale_vector.client.Sync.drop_table': ( 'vector.html#sync.drop_table',
114116
'timescale_vector/client.py'),
117+
'timescale_vector.client.Sync.munge_record': ( 'vector.html#sync.munge_record',
118+
'timescale_vector/client.py'),
115119
'timescale_vector.client.Sync.search': ('vector.html#sync.search', 'timescale_vector/client.py'),
116120
'timescale_vector.client.Sync.table_is_empty': ( 'vector.html#sync.table_is_empty',
117121
'timescale_vector/client.py'),

0 commit comments

Comments
 (0)