Skip to content

Last minor improvements for 2.1.0rc1 prerelease #368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
main
====
v 2.1.0rc1
==========
Switch default encode_maps_as_lists_in_tables to "DATAAPIMAPS" (no observable changes w.r.t. previous releases)
Bugfix: fix paykload encoding for empty maps/DataAPIMaps
User-defined types in tables:
Expand All @@ -8,6 +8,7 @@ User-defined types in tables:
- full support for UDTs in table ddl: `Database.create_table()`/`Database.list_tables()`/`table.definition()`
- UDT support in tables: introduced `serializer_by_class` and `deserializer_by_udt` serdes options
- UDT support in tables: introduced `DataAPIDictUDT` default dict-wrapper type
Tables, all index creation methods support a `definition` parameter, alternatively to the "column[+options]" call pattern
Tables, support for "text indexes":
- classes `TableTextIndexDefinition` and `TableTextIndexOptions` (e.g. for `list_indexes()` method)
- tables got a `.create_text_index()` method
Expand Down
336 changes: 289 additions & 47 deletions astrapy/data/table.py

Large diffs are not rendered by default.

17 changes: 5 additions & 12 deletions astrapy/data/utils/collection_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,24 +67,17 @@ def preprocess_collection_payload_value(
or isinstance(_value, DataAPIVector)
):
_value = convert_vector_to_floats(_value)
# now _value is either a list or a DataAPIVector.
# can/should it be binary-encoded?
can_bin_encode = path[0] in {"insertOne", "insertMany"}
# will it be bin-encoded?
# now _value is either a list or a DataAPIVector. Check for binary-encoding:
if isinstance(_value, DataAPIVector):
# if I can, I will
if can_bin_encode and options.binary_encode_vectors:
# Binary-encode if serdes options allow it
if options.binary_encode_vectors:
return convert_to_ejson_bytes(_value.to_bytes())
else:
# back to a regular list
return _value.data
else:
# this is a list. Encode if serdes options allow it
if (
can_bin_encode
and options.binary_encode_vectors
and isinstance(_value, list)
):
# if this is a list, encode if serdes options allow it:
if options.binary_encode_vectors and isinstance(_value, list):
return convert_to_ejson_bytes(DataAPIVector(_value).to_bytes())
else:
return _value
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
requires-python = ">=3.8,<4.0"
name = "astrapy"
version = "2.0.1"
version = "2.1.0rc1"
description = "A Python client for the Data API on DataStax Astra DB"
authors = [
{"name" = "Stefano Lottini", "email" = "[email protected]"},
Expand Down
20 changes: 2 additions & 18 deletions tests/base/collection_decimal_support_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,10 @@
from astrapy.utils.api_options import SerdesOptions, defaultSerdesOptions

S_OPTS_NO_DECS = defaultSerdesOptions.with_override(
SerdesOptions(
binary_encode_vectors=False,
custom_datatypes_in_reading=True,
unroll_iterables_to_lists=True,
use_decimals_in_collections=False,
encode_maps_as_lists_in_tables="NEVER",
accept_naive_datetimes=False,
datetime_tzinfo=None,
),
SerdesOptions(use_decimals_in_collections=False),
)
S_OPTS_OK_DECS = defaultSerdesOptions.with_override(
SerdesOptions(
binary_encode_vectors=False,
custom_datatypes_in_reading=True,
unroll_iterables_to_lists=True,
use_decimals_in_collections=True,
encode_maps_as_lists_in_tables="NEVER",
accept_naive_datetimes=False,
datetime_tzinfo=None,
),
SerdesOptions(use_decimals_in_collections=True),
)
_BASELINE_SCALAR_CASES = {
"_id": "baseline",
Expand Down
144 changes: 129 additions & 15 deletions tests/base/integration/collections/test_collection_dml_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,16 +259,12 @@ async def test_collection_vector_insertion_options_async(

acollection_Ycc = async_empty_collection.with_options(
api_options=APIOptions(
serdes_options=SerdesOptions(
custom_datatypes_in_reading=True,
),
serdes_options=SerdesOptions(custom_datatypes_in_reading=True),
),
)
acollection_Ncc = async_empty_collection.with_options(
api_options=APIOptions(
serdes_options=SerdesOptions(
custom_datatypes_in_reading=False,
),
serdes_options=SerdesOptions(custom_datatypes_in_reading=False),
),
)
docs_Ycc = [
Expand Down Expand Up @@ -723,9 +719,7 @@ async def test_collection_distinct_nonhashable_async(

d_items_noncustom = await acol.with_options(
api_options=APIOptions(
serdes_options=SerdesOptions(
custom_datatypes_in_reading=False,
)
serdes_options=SerdesOptions(custom_datatypes_in_reading=False)
),
).distinct("f")
assert len(d_items_noncustom) == 8
Expand Down Expand Up @@ -1239,16 +1233,12 @@ async def test_custom_datatypes_in_reading_async(
) -> None:
acol_standard_dtypes = async_empty_collection.with_options(
api_options=APIOptions(
serdes_options=SerdesOptions(
custom_datatypes_in_reading=False,
),
serdes_options=SerdesOptions(custom_datatypes_in_reading=False),
),
)
acol_custom_dtypes = async_empty_collection.with_options(
api_options=APIOptions(
serdes_options=SerdesOptions(
custom_datatypes_in_reading=True,
),
serdes_options=SerdesOptions(custom_datatypes_in_reading=True),
),
)
the_dtime = datetime(2000, 1, 1, 10, 11, 12, 123000, tzinfo=timezone.utc)
Expand Down Expand Up @@ -2004,3 +1994,127 @@ async def test_collection_datatype_insertability_async(
)
< one_day_ms
)

@pytest.mark.describe(
"test of collection binary-encoding vectors everywhere, async"
)
async def test_collection_binencvectors_everywhere_async(
self,
async_empty_collection: DefaultAsyncCollection,
) -> None:
binenc_options = APIOptions(
serdes_options=SerdesOptions(binary_encode_vectors=True),
)
binenc_acoll = async_empty_collection.with_options(api_options=binenc_options)

# Using DataAPIVector (the serializer binary-encodes all of these)
await binenc_acoll.insert_one({"_id": "0", "$vector": DataAPIVector([1, 1])})
await binenc_acoll.insert_many(
[{"_id": "X1", "$vector": DataAPIVector([1, 1])}]
)
await binenc_acoll.update_one(
{"_id": "0"}, {"$set": {"$vector": DataAPIVector([0, 1])}}, upsert=True
)
await binenc_acoll.update_one(
{"_id": "X2"},
{"$setOnInsert": {"$vector": DataAPIVector([0, 1])}},
upsert=True,
)
await binenc_acoll.update_one(
{},
sort={"$vector": DataAPIVector([-1, 0])},
update={"$set": {"oc": "ooo0"}},
upsert=True,
)
await binenc_acoll.update_many(
{"_id": "0"}, {"$set": {"$vector": DataAPIVector([0, 1])}}, upsert=True
)
await binenc_acoll.update_many(
{"_id": "X3"},
{"$setOnInsert": {"$vector": DataAPIVector([0, 1])}},
upsert=True,
)
await binenc_acoll.replace_one(
{"_id": "0"}, {"$vector": DataAPIVector([1, 2])}, upsert=True
)
await binenc_acoll.replace_one(
{}, {"oc": "ooo1"}, sort={"$vector": DataAPIVector([4, 1])}, upsert=True
)
await binenc_acoll.delete_one({}, sort={"$vector": DataAPIVector([5, 4])})
await binenc_acoll.find_one(sort={"$vector": DataAPIVector([5, 4])})
await binenc_acoll.find(sort={"$vector": DataAPIVector([5, 4])}).to_list()
await binenc_acoll.find_and_rerank(
{},
sort={"$hybrid": {"$vector": DataAPIVector([-1, -2]), "$lexical": "bla"}},
rerank_query="bla",
rerank_on="$lexical",
).to_list()
await binenc_acoll.find_one_and_replace(
{}, {"$vector": DataAPIVector([5, 4])}, upsert=True
)
await binenc_acoll.find_one_and_replace(
{}, {"oc": "ooo2"}, sort={"$vector": DataAPIVector([9, 1])}, upsert=True
)
await binenc_acoll.find_one_and_delete(
{}, sort={"$vector": DataAPIVector([9, 1])}
)
await binenc_acoll.find_one_and_update(
{"_id": "0"}, {"$set": {"$vector": DataAPIVector([5, 2])}}, upsert=True
)
await binenc_acoll.find_one_and_update(
{"_id": "X4"},
{"$setOnInsert": {"$vector": DataAPIVector([5, 2])}},
upsert=True,
)
await binenc_acoll.find_one_and_update(
{},
{"$set": {"oc": "ooo3"}},
sort={"$vector": DataAPIVector([7, 1])},
upsert=True,
)

# Using a plain list (the serializer binary-encodes all of these anyway)
await binenc_acoll.insert_one({"_id": "L0", "$vector": [1, 1]})
await binenc_acoll.insert_many([{"_id": "LX1", "$vector": [1, 1]}])
await binenc_acoll.update_one(
{"_id": "L0"}, {"$set": {"$vector": [0, 1]}}, upsert=True
)
await binenc_acoll.update_one(
{"_id": "LX2"}, {"$setOnInsert": {"$vector": [0, 1]}}, upsert=True
)
await binenc_acoll.update_one(
{}, sort={"$vector": [-1, 0]}, update={"$set": {"oc": "qqq0"}}, upsert=True
)
await binenc_acoll.update_many(
{"_id": "L0"}, {"$set": {"$vector": [0, 1]}}, upsert=True
)
await binenc_acoll.update_many(
{"_id": "LX3"}, {"$setOnInsert": {"$vector": [0, 1]}}, upsert=True
)
await binenc_acoll.replace_one({"_id": "L0"}, {"$vector": [1, 2]}, upsert=True)
await binenc_acoll.replace_one(
{}, {"oc": "qqq1"}, sort={"$vector": [4, 1]}, upsert=True
)
await binenc_acoll.delete_one({}, sort={"$vector": [5, 4]})
await binenc_acoll.find_one(sort={"$vector": [5, 4]})
await binenc_acoll.find(sort={"$vector": [5, 4]}).to_list()
await binenc_acoll.find_and_rerank(
{},
sort={"$hybrid": {"$vector": [-1, -2], "$lexical": "bla"}},
rerank_query="bla",
rerank_on="$lexical",
).to_list()
await binenc_acoll.find_one_and_replace({}, {"$vector": [5, 4]}, upsert=True)
await binenc_acoll.find_one_and_replace(
{}, {"oc": "qqq2"}, sort={"$vector": [9, 1]}, upsert=True
)
await binenc_acoll.find_one_and_delete({}, sort={"$vector": [9, 1]})
await binenc_acoll.find_one_and_update(
{"_id": "L0"}, {"$set": {"$vector": [5, 2]}}, upsert=True
)
await binenc_acoll.find_one_and_update(
{"_id": "LX4"}, {"$setOnInsert": {"$vector": [5, 2]}}, upsert=True
)
await binenc_acoll.find_one_and_update(
{}, {"$set": {"oc": "qqq3"}}, sort={"$vector": [7, 1]}, upsert=True
)
Loading