Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 45 additions & 2 deletions pymilvus/client/entity_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,27 @@ def convert_to_array(obj: List[Any], field_info: Any):
)


def _convert_to_vector_bytes(field_value: Any, element_type: DataType) -> bytes:
"""Convert a single vector value to bytes for byte-based vector types."""
if isinstance(field_value, bytes):
return field_value
if isinstance(field_value, np.ndarray):
expected_dtypes = {
DataType.FLOAT16_VECTOR: "float16",
DataType.BFLOAT16_VECTOR: "bfloat16",
DataType.INT8_VECTOR: "int8",
}
expected = expected_dtypes.get(element_type)
if expected and field_value.dtype != expected:
raise ParamError(
message=f"invalid input for {expected} vector. Expected an np.ndarray with dtype={expected}"
)
return field_value.view(np.uint8).tobytes()
raise ParamError(
message=f"invalid input type for {element_type.name} vector. Expected bytes or np.ndarray"
)


def convert_to_array_of_vector(obj: List[Any], field_info: Any):
# Create a single VectorField that contains all vectors flattened
field_data = schema_types.VectorField()
Expand All @@ -345,9 +366,31 @@ def convert_to_array_of_vector(obj: List[Any], field_info: Any):
f_value = field_value.tolist()
field_data.float_vector.data.extend(f_value)

elif element_type in (DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR):
all_bytes = b""
for field_value in obj:
all_bytes += _convert_to_vector_bytes(field_value, element_type)
if element_type == DataType.FLOAT16_VECTOR:
field_data.float16_vector = all_bytes
else:
field_data.bfloat16_vector = all_bytes

elif element_type == DataType.INT8_VECTOR:
all_bytes = b""
for field_value in obj:
all_bytes += _convert_to_vector_bytes(field_value, element_type)
field_data.int8_vector = all_bytes

elif element_type == DataType.BINARY_VECTOR:
all_bytes = b""
for field_value in obj:
if isinstance(field_value, bytes):
all_bytes += field_value
else:
all_bytes += bytes(field_value)
field_data.binary_vector = all_bytes

else:
# todo(SpadeA): other types are now not supported. When it's supported, make sure empty
# array is handled correctly.
raise ParamError(
message=f"Unsupported element type: {element_type} for Array of Vector field: {field_info.get('name')}"
)
Expand Down
145 changes: 144 additions & 1 deletion tests/entity_helper/test_array_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,16 +278,159 @@ def test_convert_array_of_float_vectors_numpy(self):
result = convert_to_array_of_vector(vectors, field_info)
assert list(result.float_vector.data) == [1.0, 2.0, 3.0, 4.0]

def test_convert_array_of_float16_vectors(self):
"""Test converting array of float16 vectors"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.FLOAT16_VECTOR,
"params": {"dim": 2},
}
v1 = np.array([1.0, 2.0], dtype=np.float16)
v2 = np.array([3.0, 4.0], dtype=np.float16)
result = convert_to_array_of_vector([v1, v2], field_info)
assert result.dim == 2
expected = v1.view(np.uint8).tobytes() + v2.view(np.uint8).tobytes()
assert result.float16_vector == expected

def test_convert_array_of_float16_vectors_bytes(self):
"""Test converting array of float16 vectors from raw bytes"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.FLOAT16_VECTOR,
"params": {"dim": 2},
}
v1_bytes = np.array([1.0, 2.0], dtype=np.float16).view(np.uint8).tobytes()
v2_bytes = np.array([3.0, 4.0], dtype=np.float16).view(np.uint8).tobytes()
result = convert_to_array_of_vector([v1_bytes, v2_bytes], field_info)
assert result.float16_vector == v1_bytes + v2_bytes

def test_convert_empty_array_of_float16_vectors(self):
"""Test converting empty array of float16 vectors"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.FLOAT16_VECTOR,
"params": {"dim": 4},
}
result = convert_to_array_of_vector([], field_info)
assert result.dim == 4
assert result.float16_vector == b""

def test_convert_array_of_bfloat16_vectors(self):
"""Test converting array of bfloat16 vectors"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.BFLOAT16_VECTOR,
"params": {"dim": 2},
}
# Use raw bytes since bfloat16 may not be available in all numpy versions
v1_bytes = b"\x00\x3f\x00\x40" # 4 bytes for dim=2
v2_bytes = b"\x00\x41\x00\x42"
result = convert_to_array_of_vector([v1_bytes, v2_bytes], field_info)
assert result.dim == 2
assert result.bfloat16_vector == v1_bytes + v2_bytes

def test_convert_empty_array_of_bfloat16_vectors(self):
"""Test converting empty array of bfloat16 vectors"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.BFLOAT16_VECTOR,
"params": {"dim": 4},
}
result = convert_to_array_of_vector([], field_info)
assert result.dim == 4
assert result.bfloat16_vector == b""

def test_convert_array_of_int8_vectors(self):
"""Test converting array of int8 vectors"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.INT8_VECTOR,
"params": {"dim": 3},
}
v1 = np.array([1, -2, 3], dtype=np.int8)
v2 = np.array([4, -5, 6], dtype=np.int8)
result = convert_to_array_of_vector([v1, v2], field_info)
assert result.dim == 3
expected = v1.view(np.uint8).tobytes() + v2.view(np.uint8).tobytes()
assert result.int8_vector == expected

def test_convert_empty_array_of_int8_vectors(self):
"""Test converting empty array of int8 vectors"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.INT8_VECTOR,
"params": {"dim": 3},
}
result = convert_to_array_of_vector([], field_info)
assert result.dim == 3
assert result.int8_vector == b""

def test_convert_array_of_binary_vectors(self):
"""Test converting array of binary vectors"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.BINARY_VECTOR,
"params": {"dim": 8},
}
v1 = b"\xff"
v2 = b"\x0f"
result = convert_to_array_of_vector([v1, v2], field_info)
assert result.dim == 8
assert result.binary_vector == b"\xff\x0f"

def test_convert_empty_array_of_binary_vectors(self):
"""Test converting empty array of binary vectors"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.BINARY_VECTOR,
"params": {"dim": 16},
}
result = convert_to_array_of_vector([], field_info)
assert result.dim == 16
assert result.binary_vector == b""

def test_convert_array_of_vector_unsupported_type(self):
"""Test unsupported element type raises error"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.BINARY_VECTOR,
"element_type": DataType.SPARSE_FLOAT_VECTOR,
"params": {"dim": 8},
}
with pytest.raises(ParamError, match="Unsupported element type"):
convert_to_array_of_vector([[1, 2]], field_info)

def test_convert_array_of_float16_vectors_invalid_dtype(self):
"""Test numpy array with invalid dtype raises error for float16"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.FLOAT16_VECTOR,
"params": {"dim": 2},
}
vectors = [np.array([1.0, 2.0], dtype=np.float32)]
with pytest.raises(ParamError, match="invalid input for float16 vector"):
convert_to_array_of_vector(vectors, field_info)

def test_convert_array_of_int8_vectors_invalid_dtype(self):
"""Test numpy array with invalid dtype raises error for int8"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.INT8_VECTOR,
"params": {"dim": 2},
}
vectors = [np.array([1, 2], dtype=np.int32)]
with pytest.raises(ParamError, match="invalid input for int8 vector"):
convert_to_array_of_vector(vectors, field_info)

def test_convert_array_of_float16_vectors_invalid_type(self):
"""Test non-bytes non-ndarray input raises error for float16"""
field_info = {
"name": "vec_arr_field",
"element_type": DataType.FLOAT16_VECTOR,
"params": {"dim": 2},
}
with pytest.raises(ParamError, match="invalid input type"):
convert_to_array_of_vector([[1.0, 2.0]], field_info)

def test_convert_array_of_float_vectors_invalid_dtype(self):
"""Test numpy array with invalid dtype raises error"""
field_info = {
Expand Down
16 changes: 8 additions & 8 deletions tests/orm/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1537,9 +1537,9 @@ def test_check_insert_schema_auto_id_with_data(self):
FieldSchema("vec", DataType.FLOAT_VECTOR, dim=2),
]
)
df = pd.DataFrame({"id": [1, 2], "vec": [[1.0, 2.0], [3.0, 4.0]]})
insert_data = pd.DataFrame({"id": [1, 2], "vec": [[1.0, 2.0], [3.0, 4.0]]})
with pytest.raises(DataNotMatchException, match="auto_id"):
check_insert_schema(schema, df)
check_insert_schema(schema, insert_data)


class TestCheckUpsertSchema:
Expand All @@ -1552,9 +1552,9 @@ def test_check_upsert_schema_none(self):

def test_check_upsert_schema_missing_pk(self, basic_schema):
"""Test upsert with missing primary key in DataFrame."""
df = pd.DataFrame({"vec": [[1.0] * 128, [2.0] * 128]})
upsert_data = pd.DataFrame({"vec": [[1.0] * 128, [2.0] * 128]})
with pytest.raises(DataNotMatchException, match="pk"):
check_upsert_schema(basic_schema, df)
check_upsert_schema(basic_schema, upsert_data)


class TestCheckSchema:
Expand Down Expand Up @@ -1588,9 +1588,9 @@ class TestPrepareFieldsFromDataframe:

def test_empty_dataframe_unknown_dtype(self):
"""Test with empty DataFrame having unknown dtype."""
df = pd.DataFrame({"col": pd.array([], dtype=object)})
empty_frame = pd.DataFrame({"col": pd.array([], dtype=object)})
with pytest.raises(CannotInferSchemaException):
prepare_fields_from_dataframe(df)
prepare_fields_from_dataframe(empty_frame)

@pytest.mark.parametrize(
"vec_data,expected_dtype",
Expand All @@ -1605,8 +1605,8 @@ def test_empty_dataframe_unknown_dtype(self):
)
def test_dataframe_with_vectors(self, vec_data, expected_dtype):
"""Test DataFrame with vector columns."""
df = pd.DataFrame({"id": [1], "vec": [vec_data]})
_, data_types, params = prepare_fields_from_dataframe(df)
vec_frame = pd.DataFrame({"id": [1], "vec": [vec_data]})
_, data_types, params = prepare_fields_from_dataframe(vec_frame)
assert expected_dtype in data_types
assert "vec" in params

Expand Down