Skip to content

Commit ed93c14

Browse files
authored
Last minor improvements for 2.1.0rc1 prerelease (#368)
* remove redundant serdes options in tests * reduced unused clutter in settings serdes options in unit tests * reduced unused clutter in settings serdes options in integration tests * adjust a silly typo in the udt async IT * ensure binary-encoded vectors are tested in all places for tables * all of the collection write path uses binary-encoded vectors now. Added IT * Added support for a 'definition' parameter in all create-index table methods, +full IT * version changed to 2.1.0rc1 * type-correct create-index-call-pattern tests now * other minor test-related fixes and improvements
1 parent f692289 commit ed93c14

23 files changed

+1916
-1298
lines changed

CHANGES

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
main
2-
====
1+
v 2.1.0rc1
2+
==========
33
Switch default encode_maps_as_lists_in_tables to "DATAAPIMAPS" (no observable changes w.r.t. previous releases)
44
Bugfix: fix paykload encoding for empty maps/DataAPIMaps
55
User-defined types in tables:
@@ -8,6 +8,7 @@ User-defined types in tables:
88
- full support for UDTs in table ddl: `Database.create_table()`/`Database.list_tables()`/`table.definition()`
99
- UDT support in tables: introduced `serializer_by_class` and `deserializer_by_udt` serdes options
1010
- UDT support in tables: introduced `DataAPIDictUDT` default dict-wrapper type
11+
Tables, all index creation methods support a `definition` parameter, alternatively to the "column[+options]" call pattern
1112
Tables, support for "text indexes":
1213
- classes `TableTextIndexDefinition` and `TableTextIndexOptions` (e.g. for `list_indexes()` method)
1314
- tables got a `.create_text_index()` method

astrapy/data/table.py

Lines changed: 289 additions & 47 deletions
Large diffs are not rendered by default.

astrapy/data/utils/collection_converters.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,24 +67,17 @@ def preprocess_collection_payload_value(
6767
or isinstance(_value, DataAPIVector)
6868
):
6969
_value = convert_vector_to_floats(_value)
70-
# now _value is either a list or a DataAPIVector.
71-
# can/should it be binary-encoded?
72-
can_bin_encode = path[0] in {"insertOne", "insertMany"}
73-
# will it be bin-encoded?
70+
# now _value is either a list or a DataAPIVector. Check for binary-encoding:
7471
if isinstance(_value, DataAPIVector):
75-
# if I can, I will
76-
if can_bin_encode and options.binary_encode_vectors:
72+
# Binary-encode if serdes options allow it
73+
if options.binary_encode_vectors:
7774
return convert_to_ejson_bytes(_value.to_bytes())
7875
else:
7976
# back to a regular list
8077
return _value.data
8178
else:
82-
# this is a list. Encode if serdes options allow it
83-
if (
84-
can_bin_encode
85-
and options.binary_encode_vectors
86-
and isinstance(_value, list)
87-
):
79+
# if this is a list, encode if serdes options allow it:
80+
if options.binary_encode_vectors and isinstance(_value, list):
8881
return convert_to_ejson_bytes(DataAPIVector(_value).to_bytes())
8982
else:
9083
return _value

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
requires-python = ">=3.8,<4.0"
33
name = "astrapy"
4-
version = "2.0.1"
4+
version = "2.1.0rc1"
55
description = "A Python client for the Data API on DataStax Astra DB"
66
authors = [
77
{"name" = "Stefano Lottini", "email" = "[email protected]"},

tests/base/collection_decimal_support_assets.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,10 @@
2020
from astrapy.utils.api_options import SerdesOptions, defaultSerdesOptions
2121

2222
S_OPTS_NO_DECS = defaultSerdesOptions.with_override(
23-
SerdesOptions(
24-
binary_encode_vectors=False,
25-
custom_datatypes_in_reading=True,
26-
unroll_iterables_to_lists=True,
27-
use_decimals_in_collections=False,
28-
encode_maps_as_lists_in_tables="NEVER",
29-
accept_naive_datetimes=False,
30-
datetime_tzinfo=None,
31-
),
23+
SerdesOptions(use_decimals_in_collections=False),
3224
)
3325
S_OPTS_OK_DECS = defaultSerdesOptions.with_override(
34-
SerdesOptions(
35-
binary_encode_vectors=False,
36-
custom_datatypes_in_reading=True,
37-
unroll_iterables_to_lists=True,
38-
use_decimals_in_collections=True,
39-
encode_maps_as_lists_in_tables="NEVER",
40-
accept_naive_datetimes=False,
41-
datetime_tzinfo=None,
42-
),
26+
SerdesOptions(use_decimals_in_collections=True),
4327
)
4428
_BASELINE_SCALAR_CASES = {
4529
"_id": "baseline",

tests/base/integration/collections/test_collection_dml_async.py

Lines changed: 129 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -259,16 +259,12 @@ async def test_collection_vector_insertion_options_async(
259259

260260
acollection_Ycc = async_empty_collection.with_options(
261261
api_options=APIOptions(
262-
serdes_options=SerdesOptions(
263-
custom_datatypes_in_reading=True,
264-
),
262+
serdes_options=SerdesOptions(custom_datatypes_in_reading=True),
265263
),
266264
)
267265
acollection_Ncc = async_empty_collection.with_options(
268266
api_options=APIOptions(
269-
serdes_options=SerdesOptions(
270-
custom_datatypes_in_reading=False,
271-
),
267+
serdes_options=SerdesOptions(custom_datatypes_in_reading=False),
272268
),
273269
)
274270
docs_Ycc = [
@@ -723,9 +719,7 @@ async def test_collection_distinct_nonhashable_async(
723719

724720
d_items_noncustom = await acol.with_options(
725721
api_options=APIOptions(
726-
serdes_options=SerdesOptions(
727-
custom_datatypes_in_reading=False,
728-
)
722+
serdes_options=SerdesOptions(custom_datatypes_in_reading=False)
729723
),
730724
).distinct("f")
731725
assert len(d_items_noncustom) == 8
@@ -1239,16 +1233,12 @@ async def test_custom_datatypes_in_reading_async(
12391233
) -> None:
12401234
acol_standard_dtypes = async_empty_collection.with_options(
12411235
api_options=APIOptions(
1242-
serdes_options=SerdesOptions(
1243-
custom_datatypes_in_reading=False,
1244-
),
1236+
serdes_options=SerdesOptions(custom_datatypes_in_reading=False),
12451237
),
12461238
)
12471239
acol_custom_dtypes = async_empty_collection.with_options(
12481240
api_options=APIOptions(
1249-
serdes_options=SerdesOptions(
1250-
custom_datatypes_in_reading=True,
1251-
),
1241+
serdes_options=SerdesOptions(custom_datatypes_in_reading=True),
12521242
),
12531243
)
12541244
the_dtime = datetime(2000, 1, 1, 10, 11, 12, 123000, tzinfo=timezone.utc)
@@ -2004,3 +1994,127 @@ async def test_collection_datatype_insertability_async(
20041994
)
20051995
< one_day_ms
20061996
)
1997+
1998+
@pytest.mark.describe(
1999+
"test of collection binary-encoding vectors everywhere, async"
2000+
)
2001+
async def test_collection_binencvectors_everywhere_async(
2002+
self,
2003+
async_empty_collection: DefaultAsyncCollection,
2004+
) -> None:
2005+
binenc_options = APIOptions(
2006+
serdes_options=SerdesOptions(binary_encode_vectors=True),
2007+
)
2008+
binenc_acoll = async_empty_collection.with_options(api_options=binenc_options)
2009+
2010+
# Using DataAPIVector (the serializer binary-encodes all of these)
2011+
await binenc_acoll.insert_one({"_id": "0", "$vector": DataAPIVector([1, 1])})
2012+
await binenc_acoll.insert_many(
2013+
[{"_id": "X1", "$vector": DataAPIVector([1, 1])}]
2014+
)
2015+
await binenc_acoll.update_one(
2016+
{"_id": "0"}, {"$set": {"$vector": DataAPIVector([0, 1])}}, upsert=True
2017+
)
2018+
await binenc_acoll.update_one(
2019+
{"_id": "X2"},
2020+
{"$setOnInsert": {"$vector": DataAPIVector([0, 1])}},
2021+
upsert=True,
2022+
)
2023+
await binenc_acoll.update_one(
2024+
{},
2025+
sort={"$vector": DataAPIVector([-1, 0])},
2026+
update={"$set": {"oc": "ooo0"}},
2027+
upsert=True,
2028+
)
2029+
await binenc_acoll.update_many(
2030+
{"_id": "0"}, {"$set": {"$vector": DataAPIVector([0, 1])}}, upsert=True
2031+
)
2032+
await binenc_acoll.update_many(
2033+
{"_id": "X3"},
2034+
{"$setOnInsert": {"$vector": DataAPIVector([0, 1])}},
2035+
upsert=True,
2036+
)
2037+
await binenc_acoll.replace_one(
2038+
{"_id": "0"}, {"$vector": DataAPIVector([1, 2])}, upsert=True
2039+
)
2040+
await binenc_acoll.replace_one(
2041+
{}, {"oc": "ooo1"}, sort={"$vector": DataAPIVector([4, 1])}, upsert=True
2042+
)
2043+
await binenc_acoll.delete_one({}, sort={"$vector": DataAPIVector([5, 4])})
2044+
await binenc_acoll.find_one(sort={"$vector": DataAPIVector([5, 4])})
2045+
await binenc_acoll.find(sort={"$vector": DataAPIVector([5, 4])}).to_list()
2046+
await binenc_acoll.find_and_rerank(
2047+
{},
2048+
sort={"$hybrid": {"$vector": DataAPIVector([-1, -2]), "$lexical": "bla"}},
2049+
rerank_query="bla",
2050+
rerank_on="$lexical",
2051+
).to_list()
2052+
await binenc_acoll.find_one_and_replace(
2053+
{}, {"$vector": DataAPIVector([5, 4])}, upsert=True
2054+
)
2055+
await binenc_acoll.find_one_and_replace(
2056+
{}, {"oc": "ooo2"}, sort={"$vector": DataAPIVector([9, 1])}, upsert=True
2057+
)
2058+
await binenc_acoll.find_one_and_delete(
2059+
{}, sort={"$vector": DataAPIVector([9, 1])}
2060+
)
2061+
await binenc_acoll.find_one_and_update(
2062+
{"_id": "0"}, {"$set": {"$vector": DataAPIVector([5, 2])}}, upsert=True
2063+
)
2064+
await binenc_acoll.find_one_and_update(
2065+
{"_id": "X4"},
2066+
{"$setOnInsert": {"$vector": DataAPIVector([5, 2])}},
2067+
upsert=True,
2068+
)
2069+
await binenc_acoll.find_one_and_update(
2070+
{},
2071+
{"$set": {"oc": "ooo3"}},
2072+
sort={"$vector": DataAPIVector([7, 1])},
2073+
upsert=True,
2074+
)
2075+
2076+
# Using a plain list (the serializer binary-encodes all of these anyway)
2077+
await binenc_acoll.insert_one({"_id": "L0", "$vector": [1, 1]})
2078+
await binenc_acoll.insert_many([{"_id": "LX1", "$vector": [1, 1]}])
2079+
await binenc_acoll.update_one(
2080+
{"_id": "L0"}, {"$set": {"$vector": [0, 1]}}, upsert=True
2081+
)
2082+
await binenc_acoll.update_one(
2083+
{"_id": "LX2"}, {"$setOnInsert": {"$vector": [0, 1]}}, upsert=True
2084+
)
2085+
await binenc_acoll.update_one(
2086+
{}, sort={"$vector": [-1, 0]}, update={"$set": {"oc": "qqq0"}}, upsert=True
2087+
)
2088+
await binenc_acoll.update_many(
2089+
{"_id": "L0"}, {"$set": {"$vector": [0, 1]}}, upsert=True
2090+
)
2091+
await binenc_acoll.update_many(
2092+
{"_id": "LX3"}, {"$setOnInsert": {"$vector": [0, 1]}}, upsert=True
2093+
)
2094+
await binenc_acoll.replace_one({"_id": "L0"}, {"$vector": [1, 2]}, upsert=True)
2095+
await binenc_acoll.replace_one(
2096+
{}, {"oc": "qqq1"}, sort={"$vector": [4, 1]}, upsert=True
2097+
)
2098+
await binenc_acoll.delete_one({}, sort={"$vector": [5, 4]})
2099+
await binenc_acoll.find_one(sort={"$vector": [5, 4]})
2100+
await binenc_acoll.find(sort={"$vector": [5, 4]}).to_list()
2101+
await binenc_acoll.find_and_rerank(
2102+
{},
2103+
sort={"$hybrid": {"$vector": [-1, -2], "$lexical": "bla"}},
2104+
rerank_query="bla",
2105+
rerank_on="$lexical",
2106+
).to_list()
2107+
await binenc_acoll.find_one_and_replace({}, {"$vector": [5, 4]}, upsert=True)
2108+
await binenc_acoll.find_one_and_replace(
2109+
{}, {"oc": "qqq2"}, sort={"$vector": [9, 1]}, upsert=True
2110+
)
2111+
await binenc_acoll.find_one_and_delete({}, sort={"$vector": [9, 1]})
2112+
await binenc_acoll.find_one_and_update(
2113+
{"_id": "L0"}, {"$set": {"$vector": [5, 2]}}, upsert=True
2114+
)
2115+
await binenc_acoll.find_one_and_update(
2116+
{"_id": "LX4"}, {"$setOnInsert": {"$vector": [5, 2]}}, upsert=True
2117+
)
2118+
await binenc_acoll.find_one_and_update(
2119+
{}, {"$set": {"oc": "qqq3"}}, sort={"$vector": [7, 1]}, upsert=True
2120+
)

0 commit comments

Comments
 (0)