@@ -1037,7 +1037,73 @@ def type(self) -> HighlightType:
10371037 return HighlightType .LEXICAL
10381038
10391039
1040- Highlighter = LexicalHighlighter # Use Union[...] to add more highlighter types in the future
1040+ class SemanticHighlighter :
1041+ """
1042+ Represents the configuration for semantic highlighting in search results.
1043+
1044+ This class encapsulates parameters used to identify and mark relevant text
1045+ segments based on semantic similarity rather than just keyword matching.
1046+ """
1047+
1048+ def __init__ (
1049+ self ,
1050+ queries : List [str ],
1051+ input_fields : List [str ],
1052+ * ,
1053+ pre_tags : Optional [List [str ]] = None ,
1054+ post_tags : Optional [List [str ]] = None ,
1055+ threshold : Optional [float ] = None ,
1056+ highlight_only : Optional [bool ] = None ,
1057+ model_deployment_id : Optional [str ] = None ,
1058+ max_client_batch_size : Optional [int ] = None ,
1059+ ):
1060+ """
1061+ Initializes the SemanticHighlighter instance.
1062+
1063+ Args:
1064+ queries: A list of search queries to match against the document.
1065+ input_fields: The schema fields to highlight.
1066+ pre_tags: HTML tags or strings to insert before the highlighted text.
1067+ post_tags: HTML tags or strings to insert after the highlighted text.
1068+ threshold: The minimum confidence score (0.0 to 1.0) to trigger highlighting.
1069+ highlight_only: If True, returns only the highlighted snippets instead of full text.
1070+ model_deployment_id: The ID of the deployed model used for semantic inference.
1071+ max_client_batch_size: Limits the number of items processed in a single batch.
1072+ """
1073+
1074+ self .queries = queries
1075+ self .input_fields = input_fields
1076+ self .pre_tags = pre_tags
1077+ self .post_tags = post_tags
1078+ self .threshold = threshold
1079+ self .highlight_only = highlight_only
1080+ self .model_deployment_id = model_deployment_id
1081+ self .max_client_batch_size = max_client_batch_size
1082+
1083+ @property
1084+ def params (self ) -> Dict [str , Any ]:
1085+ params = {
1086+ "queries" : self .queries ,
1087+ "input_fields" : self .input_fields ,
1088+ }
1089+
1090+ optional_params = {
1091+ "pre_tags" : self .pre_tags ,
1092+ "post_tags" : self .post_tags ,
1093+ "threshold" : self .threshold ,
1094+ "highlight_only" : self .highlight_only ,
1095+ "model_deployment_id" : self .model_deployment_id ,
1096+ "max_client_batch_size" : self .max_client_batch_size ,
1097+ }
1098+ params .update ({k : v for k , v in optional_params .items () if v is not None })
1099+ return params
1100+
1101+ @property
1102+ def type (self ) -> HighlightType :
1103+ return HighlightType .SEMANTIC
1104+
1105+
1106+ Highlighter = Union [LexicalHighlighter , SemanticHighlighter ]
10411107
10421108
10431109def is_valid_insert_data (data : Union [pd .DataFrame , list , dict ]) -> bool :
0 commit comments