|
72 | 72 | "import json\n",
|
73 | 73 | "import numpy as np\n",
|
74 | 74 | "import math\n",
|
75 |
| - "from datetime import timedelta" |
| 75 | + "import random\n", |
| 76 | + "from datetime import timedelta\n", |
| 77 | + "from datetime import datetime\n", |
| 78 | + "import calendar" |
76 | 79 | ]
|
77 | 80 | },
|
78 | 81 | {
|
|
90 | 93 | "SEARCH_RESULT_DISTANCE_IDX = 4"
|
91 | 94 | ]
|
92 | 95 | },
|
| 96 | + { |
| 97 | + "cell_type": "code", |
| 98 | + "execution_count": null, |
| 99 | + "metadata": {}, |
| 100 | + "outputs": [], |
| 101 | + "source": [ |
| 102 | + "#| export\n", |
| 103 | + "\n", |
| 104 | + "#copied from Cassandra: https://docs.datastax.com/en/drivers/python/3.2/_modules/cassandra/util.html#uuid_from_time\n", |
| 105 | + "def uuid_from_time(time_arg=None, node=None, clock_seq=None):\n", |
| 106 | + " if time_arg is None:\n", |
| 107 | + " return uuid.uuid1(node, clock_seq)\n", |
| 108 | + " \"\"\"\n", |
| 109 | + " Converts a datetime or timestamp to a type 1 :class:`uuid.UUID`.\n", |
| 110 | + "\n", |
| 111 | + " :param time_arg:\n", |
| 112 | + " The time to use for the timestamp portion of the UUID.\n", |
| 113 | + " This can either be a :class:`datetime` object or a timestamp\n", |
| 114 | + " in seconds (as returned from :meth:`time.time()`).\n", |
| 115 | + " :type datetime: :class:`datetime` or timestamp\n", |
| 116 | + "\n", |
| 117 | + " :param node:\n", |
| 118 | + " None integer for the UUID (up to 48 bits). If not specified, this\n", |
| 119 | + " field is randomized.\n", |
| 120 | + " :type node: long\n", |
| 121 | + "\n", |
| 122 | + " :param clock_seq:\n", |
| 123 | + " Clock sequence field for the UUID (up to 14 bits). If not specified,\n", |
| 124 | + " a random sequence is generated.\n", |
| 125 | + " :type clock_seq: int\n", |
| 126 | + "\n", |
| 127 | + " :rtype: :class:`uuid.UUID`\n", |
| 128 | + "\n", |
| 129 | + " \"\"\"\n", |
| 130 | + " if hasattr(time_arg, 'utctimetuple'):\n", |
| 131 | + " seconds = int(calendar.timegm(time_arg.utctimetuple()))\n", |
| 132 | + " microseconds = (seconds * 1e6) + time_arg.time().microsecond\n", |
| 133 | + " else:\n", |
| 134 | + " microseconds = int(time_arg * 1e6)\n", |
| 135 | + "\n", |
| 136 | + " # 0x01b21dd213814000 is the number of 100-ns intervals between the\n", |
| 137 | + " # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.\n", |
| 138 | + " intervals = int(microseconds * 10) + 0x01b21dd213814000\n", |
| 139 | + "\n", |
| 140 | + " time_low = intervals & 0xffffffff\n", |
| 141 | + " time_mid = (intervals >> 32) & 0xffff\n", |
| 142 | + " time_hi_version = (intervals >> 48) & 0x0fff\n", |
| 143 | + "\n", |
| 144 | + " if clock_seq is None:\n", |
| 145 | + " clock_seq = random.getrandbits(14)\n", |
| 146 | + " else:\n", |
| 147 | + " if clock_seq > 0x3fff:\n", |
| 148 | + " raise ValueError('clock_seq is out of range (need a 14-bit value)')\n", |
| 149 | + "\n", |
| 150 | + " clock_seq_low = clock_seq & 0xff\n", |
| 151 | + " clock_seq_hi_variant = 0x80 | ((clock_seq >> 8) & 0x3f)\n", |
| 152 | + "\n", |
| 153 | + " if node is None:\n", |
| 154 | + " node = random.getrandbits(48)\n", |
| 155 | + "\n", |
| 156 | + " return uuid.UUID(fields=(time_low, time_mid, time_hi_version,\n", |
| 157 | + " clock_seq_hi_variant, clock_seq_low, node), version=1)" |
| 158 | + ] |
| 159 | + }, |
| 160 | + { |
| 161 | + "cell_type": "code", |
| 162 | + "execution_count": null, |
| 163 | + "metadata": {}, |
| 164 | + "outputs": [], |
| 165 | + "source": [ |
| 166 | + "#| export\n", |
| 167 | + "class UUIDTimeRange:\n", |
| 168 | + " def __init__(self, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None, start_inclusive=True, end_inclusive=False):\n", |
| 169 | + " if start_date is not None and end_date is not None:\n", |
| 170 | + " if start_date > end_date:\n", |
| 171 | + " raise Exception(\"start_date must be before end_date\")\n", |
| 172 | + " \n", |
| 173 | + " if start_date is None and end_date is None:\n", |
| 174 | + " raise Exception(\"start_date and end_date cannot both be None\")\n", |
| 175 | + "\n", |
| 176 | + " self.start_date = start_date\n", |
| 177 | + " self.end_date = end_date\n", |
| 178 | + " self.start_inclusive = start_inclusive\n", |
| 179 | + " self.end_inclusive = end_inclusive\n", |
| 180 | + "\n", |
| 181 | + " def build_query(self, params: List) -> Tuple[str, List]:\n", |
| 182 | + " column = \"uuid_timestamp(id)\"\n", |
| 183 | + " queries = []\n", |
| 184 | + " if self.start_date is not None:\n", |
| 185 | + " if self.start_inclusive:\n", |
| 186 | + " queries.append(f\"{column} >= ${len(params)+1}\")\n", |
| 187 | + " else:\n", |
| 188 | + " queries.append(f\"{column} > ${len(params)+1}\")\n", |
| 189 | + " params.append(self.start_date)\n", |
| 190 | + " if self.end_date is not None:\n", |
| 191 | + " if self.end_inclusive:\n", |
| 192 | + " queries.append(f\"{column} <= ${len(params)+1}\")\n", |
| 193 | + " else:\n", |
| 194 | + " queries.append(f\"{column} < ${len(params)+1}\")\n", |
| 195 | + " params.append(self.end_date)\n", |
| 196 | + " return \" AND \".join(queries), params " |
| 197 | + ] |
| 198 | + }, |
93 | 199 | {
|
94 | 200 | "cell_type": "code",
|
95 | 201 | "execution_count": null,
|
|
461 | 567 | "\n",
|
462 | 568 | " return (where, params)\n",
|
463 | 569 | "\n",
|
464 |
| - " def search_query(self, query_embedding: Optional[Union[List[float], np.ndarray]], limit: int = 10, filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None, predicates: Optional[Predicates] = None) -> Tuple[str, List]:\n", |
| 570 | + " def search_query(\n", |
| 571 | + " self, \n", |
| 572 | + " query_embedding: Optional[Union[List[float], np.ndarray]], \n", |
| 573 | + " limit: int = 10, \n", |
| 574 | + " filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None, \n", |
| 575 | + " predicates: Optional[Predicates] = None,\n", |
| 576 | + " uuid_time_filter: Optional[UUIDTimeRange] = None,\n", |
| 577 | + " ) -> Tuple[str, List]:\n", |
465 | 578 | " \"\"\"\n",
|
466 | 579 | " Generates a similarity query.\n",
|
467 | 580 | "\n",
|
|
487 | 600 | " if predicates is not None:\n",
|
488 | 601 | " (where_predicates, params) = predicates.build_query(params)\n",
|
489 | 602 | " where_clauses.append(where_predicates)\n",
|
| 603 | + "\n", |
| 604 | + " if uuid_time_filter is not None:\n", |
| 605 | + " if self.time_partition_interval is None:\n", |
| 606 | + " raise ValueError(\"\"\"uuid_time_filter is only supported when time_partitioning is enabled.\"\"\")\n", |
| 607 | + " \n", |
| 608 | + " (where_time, params) = uuid_time_filter.build_query(params)\n", |
| 609 | + " where_clauses.append(where_time)\n", |
490 | 610 | " \n",
|
491 | 611 | " if len(where_clauses) > 0:\n",
|
492 | 612 | " where = \" AND \".join(where_clauses)\n",
|
|
784 | 904 | " query_embedding: Optional[List[float]] = None, \n",
|
785 | 905 | " limit: int = 10,\n",
|
786 | 906 | " filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None,\n",
|
787 |
| - " predicates: Optional[Predicates] = None): \n", |
| 907 | + " predicates: Optional[Predicates] = None,\n", |
| 908 | + " uuid_time_filter: Optional[UUIDTimeRange] = None,\n", |
| 909 | + " ): \n", |
788 | 910 | " \"\"\"\n",
|
789 | 911 | " Retrieves similar records using a similarity query.\n",
|
790 | 912 | "\n",
|
|
804 | 926 | " List: List of similar records.\n",
|
805 | 927 | " \"\"\"\n",
|
806 | 928 | " (query, params) = self.builder.search_query(\n",
|
807 |
| - " query_embedding, limit, filter, predicates)\n", |
| 929 | + " query_embedding, limit, filter, predicates, uuid_time_filter)\n", |
808 | 930 | " async with await self.connect() as pool:\n",
|
809 | 931 | " return await pool.fetch(query, *params)"
|
810 | 932 | ]
|
|
892 | 1014 | "execution_count": null,
|
893 | 1015 | "metadata": {},
|
894 | 1016 | "outputs": [
|
| 1017 | + { |
| 1018 | + "name": "stderr", |
| 1019 | + "output_type": "stream", |
| 1020 | + "text": [ |
| 1021 | + "/Users/cevian/.pyenv/versions/3.11.4/envs/nbdev_env/lib/python3.11/site-packages/fastcore/docscrape.py:225: UserWarning: potentially wrong underline length... \n", |
| 1022 | + "Returns \n", |
| 1023 | + "-------- in \n", |
| 1024 | + "Retrieves similar records using a similarity query.\n", |
| 1025 | + "...\n", |
| 1026 | + " else: warn(msg)\n" |
| 1027 | + ] |
| 1028 | + }, |
895 | 1029 | {
|
896 | 1030 | "data": {
|
897 | 1031 | "text/markdown": [
|
|
903 | 1037 | "\n",
|
904 | 1038 | "> Async.search (query_embedding:Optional[List[float]]=None, limit:int=10,\n",
|
905 | 1039 | "> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=No\n",
|
906 |
| - "> ne, predicates:Optional[__main__.Predicates]=None)\n", |
| 1040 | + "> ne, predicates:Optional[__main__.Predicates]=None,\n", |
| 1041 | + "> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)\n", |
907 | 1042 | "\n",
|
908 | 1043 | "Retrieves similar records using a similarity query.\n",
|
909 | 1044 | "\n",
|
|
913 | 1048 | "| limit | int | 10 | The number of nearest neighbors to retrieve. |\n",
|
914 | 1049 | "| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |\n",
|
915 | 1050 | "| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \\|, and ~). |\n",
|
| 1051 | + "| uuid_time_filter | Optional | None | |\n", |
916 | 1052 | "| **Returns** | **List: List of similar records.** | | |"
|
917 | 1053 | ],
|
918 | 1054 | "text/plain": [
|
|
924 | 1060 | "\n",
|
925 | 1061 | "> Async.search (query_embedding:Optional[List[float]]=None, limit:int=10,\n",
|
926 | 1062 | "> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=No\n",
|
927 |
| - "> ne, predicates:Optional[__main__.Predicates]=None)\n", |
| 1063 | + "> ne, predicates:Optional[__main__.Predicates]=None,\n", |
| 1064 | + "> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)\n", |
928 | 1065 | "\n",
|
929 | 1066 | "Retrieves similar records using a similarity query.\n",
|
930 | 1067 | "\n",
|
|
934 | 1071 | "| limit | int | 10 | The number of nearest neighbors to retrieve. |\n",
|
935 | 1072 | "| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |\n",
|
936 | 1073 | "| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \\|, and ~). |\n",
|
| 1074 | + "| uuid_time_filter | Optional | None | |\n", |
937 | 1075 | "| **Returns** | **List: List of similar records.** | | |"
|
938 | 1076 | ]
|
939 | 1077 | },
|
|
1140 | 1278 | " raised = True\n",
|
1141 | 1279 | "assert raised\n",
|
1142 | 1280 | "\n",
|
| 1281 | + "import datetime\n", |
| 1282 | + "specific_datetime = datetime.datetime(2018, 8, 10, 15, 30, 0)\n", |
| 1283 | + "await vec.upsert([\n", |
| 1284 | + " # current time\n", |
| 1285 | + " (uuid.uuid1(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", |
| 1286 | + " #time in 2018\n", |
| 1287 | + " (uuid_from_time(specific_datetime), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", |
| 1288 | + "])\n", |
| 1289 | + "assert not await vec.table_is_empty()\n", |
| 1290 | + "rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7)))\n", |
| 1291 | + "assert len(rec) == 1\n", |
| 1292 | + "rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2)))\n", |
| 1293 | + "assert len(rec) == 0\n", |
| 1294 | + "rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7)))\n", |
| 1295 | + "assert len(rec) == 2\n", |
1143 | 1296 | "await vec.drop_table()\n",
|
1144 | 1297 | "await vec.close()"
|
1145 | 1298 | ]
|
|
1458 | 1611 | " query_embedding: Optional[List[float]] = None, \n",
|
1459 | 1612 | " limit: int = 10, \n",
|
1460 | 1613 | " filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None,\n",
|
1461 |
| - " predicates: Optional[Predicates] = None):\n", |
| 1614 | + " predicates: Optional[Predicates] = None,\n", |
| 1615 | + " uuid_time_filter: Optional[UUIDTimeRange] = None,\n", |
| 1616 | + " ):\n", |
1462 | 1617 | " \"\"\"\n",
|
1463 | 1618 | " Retrieves similar records using a similarity query.\n",
|
1464 | 1619 | "\n",
|
|
1483 | 1638 | " query_embedding_np = None\n",
|
1484 | 1639 | "\n",
|
1485 | 1640 | " (query, params) = self.builder.search_query(\n",
|
1486 |
| - " query_embedding_np, limit, filter, predicates)\n", |
| 1641 | + " query_embedding_np, limit, filter, predicates, uuid_time_filter)\n", |
1487 | 1642 | " query, params = self._translate_to_pyformat(query, params)\n",
|
1488 | 1643 | " with self.connect() as conn:\n",
|
1489 | 1644 | " with conn.cursor() as cur:\n",
|
|
1595 | 1750 | "\n",
|
1596 | 1751 | "> Sync.search (query_embedding:Optional[List[float]]=None, limit:int=10,\n",
|
1597 | 1752 | "> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=Non\n",
|
1598 |
| - "> e, predicates:Optional[__main__.Predicates]=None)\n", |
| 1753 | + "> e, predicates:Optional[__main__.Predicates]=None,\n", |
| 1754 | + "> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)\n", |
1599 | 1755 | "\n",
|
1600 | 1756 | "Retrieves similar records using a similarity query.\n",
|
1601 | 1757 | "\n",
|
|
1605 | 1761 | "| limit | int | 10 | The number of nearest neighbors to retrieve. |\n",
|
1606 | 1762 | "| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |\n",
|
1607 | 1763 | "| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \\|, and ~). |\n",
|
| 1764 | + "| uuid_time_filter | Optional | None | |\n", |
1608 | 1765 | "| **Returns** | **List: List of similar records.** | | |"
|
1609 | 1766 | ],
|
1610 | 1767 | "text/plain": [
|
|
1616 | 1773 | "\n",
|
1617 | 1774 | "> Sync.search (query_embedding:Optional[List[float]]=None, limit:int=10,\n",
|
1618 | 1775 | "> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=Non\n",
|
1619 |
| - "> e, predicates:Optional[__main__.Predicates]=None)\n", |
| 1776 | + "> e, predicates:Optional[__main__.Predicates]=None,\n", |
| 1777 | + "> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)\n", |
1620 | 1778 | "\n",
|
1621 | 1779 | "Retrieves similar records using a similarity query.\n",
|
1622 | 1780 | "\n",
|
|
1626 | 1784 | "| limit | int | 10 | The number of nearest neighbors to retrieve. |\n",
|
1627 | 1785 | "| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |\n",
|
1628 | 1786 | "| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \\|, and ~). |\n",
|
| 1787 | + "| uuid_time_filter | Optional | None | |\n", |
1629 | 1788 | "| **Returns** | **List: List of similar records.** | | |"
|
1630 | 1789 | ]
|
1631 | 1790 | },
|
|
1809 | 1968 | "except BaseException as e:\n",
|
1810 | 1969 | " raised = True\n",
|
1811 | 1970 | "assert raised\n",
|
| 1971 | + "\n", |
| 1972 | + "import datetime\n", |
| 1973 | + "specific_datetime = datetime.datetime(2018, 8, 10, 15, 30, 0)\n", |
| 1974 | + "vec.upsert([\n", |
| 1975 | + " # current time\n", |
| 1976 | + " (uuid.uuid1(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", |
| 1977 | + " #time in 2018\n", |
| 1978 | + " (uuid_from_time(specific_datetime), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", |
| 1979 | + "])\n", |
| 1980 | + "assert not vec.table_is_empty()\n", |
| 1981 | + "rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7)))\n", |
| 1982 | + "assert len(rec) == 1\n", |
| 1983 | + "rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2)))\n", |
| 1984 | + "assert len(rec) == 0\n", |
| 1985 | + "rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(specific_datetime-timedelta(days=7)))\n", |
| 1986 | + "assert len(rec) == 2\n", |
1812 | 1987 | "vec.drop_table()\n",
|
1813 | 1988 | "vec.close()"
|
1814 | 1989 | ]
|
|
0 commit comments