Skip to content

Commit f528fdc

Browse files
feat(oss-opensearch): Add Scalar Quantization support
1 parent eb2c2ce commit f528fdc

File tree

4 files changed

+132
-17
lines changed

4 files changed

+132
-17
lines changed

vectordb_bench/backend/clients/oss_opensearch/cli.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,31 @@ class OSSOpenSearchTypedDict(TypedDict):
100100
str | None,
101101
click.option(
102102
"--quantization-type",
103-
type=click.Choice(["fp32", "fp16"]),
103+
type=click.Choice(["None", "LuceneSQ", "FaissSQfp16"]),
104104
help="quantization type for vectors (in index)",
105-
default="fp32",
105+
default="None",
106+
required=False,
107+
),
108+
]
109+
110+
confidence_interval: Annotated[
111+
float | None,
112+
click.option(
113+
"--confidence-interval",
114+
type=float,
115+
help="Confidence interval for Lucene SQ (0.0-1.0, optional)",
116+
default=None,
117+
required=False,
118+
),
119+
]
120+
121+
clip: Annotated[
122+
bool,
123+
click.option(
124+
"--clip",
125+
type=bool,
126+
help="Clip vectors to [-65504, 65504] for FAISS FP16",
127+
default=False,
106128
required=False,
107129
),
108130
]
@@ -150,6 +172,8 @@ def OSSOpenSearch(**parameters: Unpack[OSSOpenSearchHNSWTypedDict]):
150172
M=parameters["m"],
151173
engine=OSSOS_Engine(parameters["engine"]),
152174
quantization_type=OSSOpenSearchQuantization(parameters["quantization_type"]),
175+
confidence_interval=parameters["confidence_interval"],
176+
clip=parameters["clip"],
153177
),
154178
**parameters,
155179
)

vectordb_bench/backend/clients/oss_opensearch/config.py

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,11 @@ class OSSOS_Engine(Enum):
5151

5252

5353
class OSSOpenSearchQuantization(Enum):
54-
fp32 = "fp32"
55-
fp16 = "fp16"
54+
"""In-memory scalar quantization types"""
55+
56+
NONE = "None"
57+
LUCENE_SQ = "LuceneSQ"
58+
FAISS_SQFP16 = "FaissSQfp16"
5659

5760

5861
# Compression level constants for disk-based mode
@@ -98,14 +101,33 @@ class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
98101
cb_threshold: str | None = "50%"
99102
number_of_indexing_clients: int | None = 1
100103
use_routing: bool = False # for label-filter cases
101-
quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.fp32
104+
quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.NONE
105+
confidence_interval: float | None = None
106+
clip: bool = False
102107
replication_type: str | None = "DOCUMENT"
103108
knn_derived_source_enabled: bool = False
104109
memory_optimized_search: bool = False
105110
on_disk: bool = False
106111
compression_level: str = CompressionLevel.LEVEL_32X
107112
oversample_factor: float = 1.0
108113

114+
@validator("quantization_type", pre=True, always=True)
115+
def validate_quantization_type(cls, value: any):
116+
"""Convert string values to enum"""
117+
if not value:
118+
return OSSOpenSearchQuantization.NONE
119+
120+
if isinstance(value, OSSOpenSearchQuantization):
121+
return value
122+
123+
mapping = {
124+
"None": OSSOpenSearchQuantization.NONE,
125+
"LuceneSQ": OSSOpenSearchQuantization.LUCENE_SQ,
126+
"FaissSQfp16": OSSOpenSearchQuantization.FAISS_SQFP16,
127+
}
128+
129+
return mapping.get(value, OSSOpenSearchQuantization.NONE)
130+
109131
@root_validator
110132
def validate_engine_name(cls, values: dict):
111133
"""Map engine_name string from UI to engine enum"""
@@ -130,6 +152,8 @@ def __eq__(self, obj: any):
130152
and self.number_of_segments == obj.number_of_segments
131153
and self.use_routing == obj.use_routing
132154
and self.quantization_type == obj.quantization_type
155+
and self.confidence_interval == obj.confidence_interval
156+
and self.clip == obj.clip
133157
and self.replication_type == obj.replication_type
134158
and self.knn_derived_source_enabled == obj.knn_derived_source_enabled
135159
and self.memory_optimized_search == obj.memory_optimized_search
@@ -149,6 +173,8 @@ def __hash__(self) -> int:
149173
self.number_of_segments,
150174
self.use_routing,
151175
self.quantization_type,
176+
self.confidence_interval,
177+
self.clip,
152178
self.replication_type,
153179
self.knn_derived_source_enabled,
154180
self.memory_optimized_search,
@@ -173,7 +199,7 @@ def parse_metric(self) -> str:
173199
@property
174200
def use_quant(self) -> bool:
175201
"""Only use in-memory quantization when NOT in disk mode"""
176-
return not self.on_disk and self.quantization_type is not OSSOpenSearchQuantization.fp32
202+
return not self.on_disk and self.quantization_type != OSSOpenSearchQuantization.NONE
177203

178204
@property
179205
def resolved_engine(self) -> OSSOS_Engine:
@@ -207,11 +233,20 @@ def index_param(self) -> dict:
207233
},
208234
}
209235

236+
# Add encoder for in-memory quantization
210237
if self.use_quant:
211-
method_config["parameters"]["encoder"] = {
212-
"name": "sq",
213-
"parameters": {"type": self.quantization_type.value},
214-
}
238+
encoder_config = {"name": "sq"}
239+
240+
if self.quantization_type == OSSOpenSearchQuantization.LUCENE_SQ:
241+
# Lucene SQ: optional confidence_interval
242+
if self.confidence_interval is not None:
243+
encoder_config["parameters"] = {"confidence_interval": self.confidence_interval}
244+
245+
elif self.quantization_type == OSSOpenSearchQuantization.FAISS_SQFP16 and self.clip:
246+
# FAISS SQfp16: optional clip parameter
247+
encoder_config["parameters"] = {"type": "fp16", "clip": True}
248+
249+
method_config["parameters"]["encoder"] = encoder_config
215250

216251
return method_config
217252

vectordb_bench/frontend/config/dbCaseConfigs.py

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,17 +1973,65 @@ class CaseConfigInput(BaseModel):
19731973
isDisplayed=lambda config: config.get(CaseConfigParamType.on_disk, False) == False,
19741974
)
19751975

1976-
CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch = CaseConfigInput(
1976+
CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch = CaseConfigInput(
19771977
label=CaseConfigParamType.quantizationType,
19781978
displayLabel="Quantization Type",
1979-
inputHelp="Scalar quantization type for in-memory vectors",
1979+
inputHelp="Scalar quantization for Lucene engine",
19801980
inputType=InputType.Option,
19811981
inputConfig={
1982-
"options": ["fp32", "fp16"],
1983-
"default": "fp32",
1982+
"options": ["None", "LuceneSQ"],
1983+
"default": "None",
19841984
},
1985-
isDisplayed=lambda config: config.get(CaseConfigParamType.on_disk, False) == False,
1985+
isDisplayed=lambda config: (
1986+
not config.get(CaseConfigParamType.on_disk, False) and config.get(CaseConfigParamType.engine_name) == "lucene"
1987+
),
1988+
)
1989+
1990+
CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch = CaseConfigInput(
1991+
label=CaseConfigParamType.quantizationType,
1992+
displayLabel="Quantization Type",
1993+
inputHelp="Scalar quantization for FAISS engine",
1994+
inputType=InputType.Option,
1995+
inputConfig={
1996+
"options": ["None", "FaissSQfp16"],
1997+
"default": "None",
1998+
},
1999+
isDisplayed=lambda config: (
2000+
not config.get(CaseConfigParamType.on_disk, False) and config.get(CaseConfigParamType.engine_name) == "faiss"
2001+
),
2002+
)
2003+
2004+
CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch = CaseConfigInput(
2005+
label=CaseConfigParamType.confidence_interval,
2006+
displayLabel="Confidence Interval",
2007+
inputHelp="Quantile range for Lucene SQ (0.9-1.0, 0 for dynamic, or empty for auto)",
2008+
inputType=InputType.Float,
2009+
inputConfig={
2010+
"min": 0.0,
2011+
"max": 1.0,
2012+
"value": None,
2013+
"step": 0.1,
2014+
},
2015+
isDisplayed=lambda config: (
2016+
not config.get(CaseConfigParamType.on_disk, False)
2017+
and config.get(CaseConfigParamType.quantizationType) == "LuceneSQ"
2018+
),
2019+
)
2020+
2021+
CaseConfigParamInput_CLIP_OSSOpensearch = CaseConfigInput(
2022+
label=CaseConfigParamType.clip,
2023+
displayLabel="Clip Vectors",
2024+
inputHelp="Clip out-of-range values to [-65504, 65504] for FP16",
2025+
inputType=InputType.Bool,
2026+
inputConfig={
2027+
"value": False,
2028+
},
2029+
isDisplayed=lambda config: (
2030+
not config.get(CaseConfigParamType.on_disk, False)
2031+
and config.get(CaseConfigParamType.quantizationType) == "FaissSQfp16"
2032+
),
19862033
)
2034+
19872035
MilvusLoadConfig = [
19882036
CaseConfigParamInput_IndexType,
19892037
CaseConfigParamInput_M,
@@ -2448,7 +2496,10 @@ class CaseConfigInput(BaseModel):
24482496
CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
24492497
CaseConfigParamInput_M_AWSOpensearch,
24502498
CaseConfigParamInput_EFConstruction_AWSOpensearch,
2451-
CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch,
2499+
CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch,
2500+
CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch,
2501+
CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch,
2502+
CaseConfigParamInput_CLIP_OSSOpensearch,
24522503
CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch,
24532504
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
24542505
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
@@ -2468,7 +2519,10 @@ class CaseConfigInput(BaseModel):
24682519
CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
24692520
CaseConfigParamInput_M_AWSOpensearch,
24702521
CaseConfigParamInput_EFConstruction_AWSOpensearch,
2471-
CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch,
2522+
CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch,
2523+
CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch,
2524+
CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch,
2525+
CaseConfigParamInput_CLIP_OSSOpensearch,
24722526
CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch,
24732527
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
24742528
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,

vectordb_bench/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ class CaseConfigParamType(Enum):
134134
on_disk = "on_disk"
135135
compression_level = "compression_level"
136136
oversample_factor = "oversample_factor"
137+
confidence_interval = "confidence_interval"
138+
clip = "clip"
137139

138140
# CockroachDB parameters
139141
min_partition_size = "min_partition_size"

0 commit comments

Comments
 (0)