Skip to content

Commit 0159b0a

Browse files
feat:Support geo data type for insert, query and search (#2925) (#3043)
issue: #2924 Support geometry types for pymilvus This pull request adds support for the `GEOMETRY` data type throughout the bulk writing and search result handling code in PyMilvus. The main changes ensure that geometry data is correctly validated, encoded, decoded, and handled in all relevant utility and helper functions. Geometry values are stored as bytes (with string values encoded as UTF-8), and decoding is performed when reading results. **Support for GEOMETRY data type** * Added validation logic for `DataType.GEOMETRY` in `constants.py`, allowing geometry data to be either `str` or `bytes`. * Implemented encoding of geometry values to bytes in `pack_field_value_to_field_data` and `entity_to_field_data`, handling both string (WKT) and bytes input. [[1]](diffhunk://#diff-0c1494647658a30477e2d780c6bc64f56ef49b14c79027428e3cd70bb491c3c5R449-R469) [[2]](diffhunk://#diff-0c1494647658a30477e2d780c6bc64f56ef49b14c79027428e3cd70bb491c3c5R560-R573) **Geometry data decoding and assignment** * Updated assignment and append logic in `entity_helper.py` to decode geometry bytes back to strings when possible, both in batch assignment and single row extraction. [[1]](diffhunk://#diff-0c1494647658a30477e2d780c6bc64f56ef49b14c79027428e3cd70bb491c3c5R760-R772) [[2]](diffhunk://#diff-0c1494647658a30477e2d780c6bc64f56ef49b14c79027428e3cd70bb491c3c5R863-R878) **Search result integration** * Added `GEOMETRY` to supported types in search result parsing and implemented decoding logic for geometry fields in search result extraction and field data retrieval. [[1]](diffhunk://#diff-76d170285437d4f74831de67c329c80a2e84cfa1b65375229198810a3e65b2e6R58) [[2]](diffhunk://#diff-76d170285437d4f74831de67c329c80a2e84cfa1b65375229198810a3e65b2e6R352-R368) [[3]](diffhunk://#diff-76d170285437d4f74831de67c329c80a2e84cfa1b65375229198810a3e65b2e6R474-R475) **Utility function updates** * Updated `len_of` utility to support length calculation for geometry data fields. --------- Signed-off-by: Yinwei Li <[email protected]> Signed-off-by: Cai Zhang <[email protected]> Co-authored-by: ZhuXi <[email protected]>
1 parent 626c573 commit 0159b0a

File tree

7 files changed

+75
-33
lines changed

7 files changed

+75
-33
lines changed

pymilvus/bulk_writer/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
DataType.DOUBLE.name: lambda x: isinstance(x, float),
5353
DataType.VARCHAR.name: lambda x, max_len: isinstance(x, str) and len(x) <= max_len,
5454
DataType.JSON.name: lambda x: isinstance(x, (str, list, dict)),
55+
DataType.GEOMETRY.name: lambda x: isinstance(x, str),
5556
DataType.FLOAT_VECTOR.name: lambda x, dim: float_vector_validator(x, dim),
5657
DataType.BINARY_VECTOR.name: lambda x, dim: binary_vector_validator(x, dim),
5758
DataType.FLOAT16_VECTOR.name: lambda x, dim: float16_vector_validator(x, dim, False),

pymilvus/client/entity_helper.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,19 @@ def pack_field_value_to_field_data(
499499
% (field_name, "varchar", type(field_value))
500500
+ f" Detail: {e!s}"
501501
) from e
502+
elif field_type == DataType.GEOMETRY:
503+
try:
504+
if field_value is None:
505+
field_data.scalars.geometry_wkt_data.data.extend([])
506+
else:
507+
field_data.scalars.geometry_wkt_data.data.append(
508+
convert_to_str_array(field_value, field_info, CHECK_STR_ARRAY)
509+
)
510+
except (TypeError, ValueError) as e:
511+
raise DataNotMatchException(
512+
message=ExceptionsMessage.FieldDataInconsistent
513+
% (field_name, "geometry", type(field_value))
514+
) from e
502515
elif field_type == DataType.JSON:
503516
try:
504517
if field_value is None:
@@ -589,6 +602,10 @@ def entity_to_field_data(entity: Dict, field_info: Any, num_rows: int) -> schema
589602
field_data.scalars.array_data.data.extend(
590603
entity_to_array_arr(entity_values, field_info)
591604
)
605+
elif entity_type == DataType.GEOMETRY:
606+
field_data.scalars.geometry_wkt_data.data.extend(
607+
entity_to_str_arr(entity_values, field_info, CHECK_STR_ARRAY)
608+
)
592609
else:
593610
raise ParamError(message=f"Unsupported data type: {entity_type}")
594611
except (TypeError, ValueError) as e:
@@ -775,6 +792,11 @@ def assign_scalar(data: List[Any]) -> None:
775792
assign_scalar(data)
776793
return False
777794

795+
if field_data.type == DataType.GEOMETRY:
796+
data = field_data.scalars.geometry_wkt_data.data
797+
assign_scalar(data)
798+
return False
799+
778800
if field_data.type == DataType.JSON:
779801
return True
780802

@@ -894,6 +916,18 @@ def check_append(field_data: Any, row_data: Dict):
894916
row_data[field_data.field_name] = field_data.scalars.string_data.data[index]
895917
return
896918

919+
if (
920+
field_data.type == DataType.GEOMETRY
921+
and len(field_data.scalars.geometry_wkt_data.data) >= index
922+
):
923+
if len(field_data.valid_data) > 0 and field_data.valid_data[index] is False:
924+
entity_row_data[field_data.field_name] = None
925+
return
926+
entity_row_data[field_data.field_name] = field_data.scalars.geometry_wkt_data.data[
927+
index
928+
]
929+
return
930+
897931
if field_data.type == DataType.JSON and len(field_data.scalars.json_data.data) >= index:
898932
if len(field_data.valid_data) > 0 and field_data.valid_data[index] is False:
899933
row_data[field_data.field_name] = None

pymilvus/client/search_result.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def __init__(
5656
DataType.FLOAT,
5757
DataType.DOUBLE,
5858
DataType.VARCHAR,
59+
DataType.GEOMETRY,
5960
]:
6061
if has_valid:
6162
[
@@ -402,6 +403,15 @@ def _get_fields_by_range(
402403
)
403404
continue
404405

406+
if dtype == DataType.GEOMETRY:
407+
field2data[name] = (
408+
apply_valid_data(
409+
scalars.geometry_wkt_data.data[start:end], field.valid_data, start, end
410+
),
411+
field_meta,
412+
)
413+
continue
414+
405415
if dtype == DataType.JSON:
406416
res = apply_valid_data(
407417
scalars.json_data.data[start:end], field.valid_data, start, end
@@ -521,6 +531,8 @@ def get_field_data(field_data: FieldData):
521531
return field_data.scalars.double_data.data
522532
if field_data.type == DataType.VARCHAR:
523533
return field_data.scalars.string_data.data
534+
if field_data.type == DataType.GEOMETRY:
535+
return field_data.scalars.geometry_wkt_data.data
524536
if field_data.type == DataType.JSON:
525537
return field_data.scalars.json_data.data
526538
if field_data.type == DataType.ARRAY:

pymilvus/client/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ def len_of(field_data: Any) -> int:
175175
if field_data.scalars.HasField("array_data"):
176176
return len(field_data.scalars.array_data.data)
177177

178+
if field_data.scalars.HasField("geometry_wkt_data"):
179+
return len(field_data.scalars.geometry_wkt_data.data)
180+
178181
raise MilvusException(message="Unsupported scalar type")
179182

180183
if field_data.HasField("vectors"):

pymilvus/grpc_gen/msg_pb2.py

Lines changed: 22 additions & 26 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)