Skip to content

Commit 1cac7f4

Browse files
feat: [2.6]Support semantic-highlighter (#3180)
Signed-off-by: junjie.jiang <junjie.jiang@zilliz.com>
1 parent 88bc083 commit 1cac7f4

File tree

2 files changed

+69
-1
lines changed

2 files changed

+69
-1
lines changed

pymilvus/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
Function,
5252
FunctionScore,
5353
LexicalHighlighter,
54+
SemanticHighlighter,
5455
StructFieldSchema,
5556
)
5657
from .orm.utility import (
@@ -116,6 +117,7 @@
116117
"Role",
117118
"SearchFuture",
118119
"SearchResult",
120+
"SemanticHighlighter",
119121
"Shard",
120122
"Status",
121123
"StructFieldSchema",

pymilvus/orm/schema.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1037,7 +1037,73 @@ def type(self) -> HighlightType:
10371037
return HighlightType.LEXICAL
10381038

10391039

1040-
Highlighter = LexicalHighlighter # Use Union[...] to add more highlighter types in the future
1040+
class SemanticHighlighter:
1041+
"""
1042+
Represents the configuration for semantic highlighting in search results.
1043+
1044+
This class encapsulates parameters used to identify and mark relevant text
1045+
segments based on semantic similarity rather than just keyword matching.
1046+
"""
1047+
1048+
def __init__(
1049+
self,
1050+
queries: List[str],
1051+
input_fields: List[str],
1052+
*,
1053+
pre_tags: Optional[List[str]] = None,
1054+
post_tags: Optional[List[str]] = None,
1055+
threshold: Optional[float] = None,
1056+
highlight_only: Optional[bool] = None,
1057+
model_deployment_id: Optional[str] = None,
1058+
max_client_batch_size: Optional[int] = None,
1059+
):
1060+
"""
1061+
Initializes the SemanticHighlighter instance.
1062+
1063+
Args:
1064+
queries: A list of search queries to match against the document.
1065+
input_fields: The schema fields to highlight.
1066+
pre_tags: HTML tags or strings to insert before the highlighted text.
1067+
post_tags: HTML tags or strings to insert after the highlighted text.
1068+
threshold: The minimum confidence score (0.0 to 1.0) to trigger highlighting.
1069+
highlight_only: If True, returns only the highlighted snippets instead of full text.
1070+
model_deployment_id: The ID of the deployed model used for semantic inference.
1071+
max_client_batch_size: Limits the number of items processed in a single batch.
1072+
"""
1073+
1074+
self.queries = queries
1075+
self.input_fields = input_fields
1076+
self.pre_tags = pre_tags
1077+
self.post_tags = post_tags
1078+
self.threshold = threshold
1079+
self.highlight_only = highlight_only
1080+
self.model_deployment_id = model_deployment_id
1081+
self.max_client_batch_size = max_client_batch_size
1082+
1083+
@property
1084+
def params(self) -> Dict[str, Any]:
1085+
params = {
1086+
"queries": self.queries,
1087+
"input_fields": self.input_fields,
1088+
}
1089+
1090+
optional_params = {
1091+
"pre_tags": self.pre_tags,
1092+
"post_tags": self.post_tags,
1093+
"threshold": self.threshold,
1094+
"highlight_only": self.highlight_only,
1095+
"model_deployment_id": self.model_deployment_id,
1096+
"max_client_batch_size": self.max_client_batch_size,
1097+
}
1098+
params.update({k: v for k, v in optional_params.items() if v is not None})
1099+
return params
1100+
1101+
@property
1102+
def type(self) -> HighlightType:
1103+
return HighlightType.SEMANTIC
1104+
1105+
1106+
Highlighter = Union[LexicalHighlighter, SemanticHighlighter]
10411107

10421108

10431109
def is_valid_insert_data(data: Union[pd.DataFrame, list, dict]) -> bool:

0 commit comments

Comments
 (0)