|
| 1 | +"""SVS-VAMANA compression configuration utilities.""" |
| 2 | + |
| 3 | +from typing import Literal, Optional, TypedDict, cast |
| 4 | + |
| 5 | + |
| 6 | +class SVSConfig(TypedDict, total=False): |
| 7 | + """SVS-VAMANA configuration dictionary. |
| 8 | +
|
| 9 | + Attributes: |
| 10 | + algorithm: Always "svs-vamana" |
| 11 | + datatype: Vector datatype (float16, float32) |
| 12 | + compression: Compression type (LVQ4, LeanVec4x8, etc.) |
| 13 | + reduce: Reduced dimensionality (only for LeanVec) |
| 14 | + graph_max_degree: Max edges per node |
| 15 | + construction_window_size: Build-time candidates |
| 16 | + search_window_size: Query-time candidates |
| 17 | + """ |
| 18 | + |
| 19 | + algorithm: Literal["svs-vamana"] |
| 20 | + datatype: str |
| 21 | + compression: str |
| 22 | + reduce: int # only for LeanVec |
| 23 | + graph_max_degree: int |
| 24 | + construction_window_size: int |
| 25 | + search_window_size: int |
| 26 | + |
| 27 | + |
| 28 | +class CompressionAdvisor: |
| 29 | + """Helper to recommend compression settings based on vector characteristics. |
| 30 | +
|
| 31 | + This class provides utilities to: |
| 32 | + - Recommend optimal SVS-VAMANA configurations based on vector dimensions and priorities |
| 33 | + - Estimate memory savings from compression and dimensionality reduction |
| 34 | +
|
| 35 | + Examples: |
| 36 | + >>> # Get recommendations for high-dimensional vectors |
| 37 | + >>> config = CompressionAdvisor.recommend(dims=1536, priority="balanced") |
| 38 | + >>> config["compression"] |
| 39 | + 'LeanVec4x8' |
| 40 | + >>> config["reduce"] |
| 41 | + 768 |
| 42 | +
|
| 43 | + >>> # Estimate memory savings |
| 44 | + >>> savings = CompressionAdvisor.estimate_memory_savings( |
| 45 | + ... compression="LeanVec4x8", |
| 46 | + ... dims=1536, |
| 47 | + ... reduce=768 |
| 48 | + ... ) |
| 49 | + >>> savings |
| 50 | + 81.2 |
| 51 | + """ |
| 52 | + |
| 53 | + # Dimension thresholds |
| 54 | + HIGH_DIM_THRESHOLD = 1024 |
| 55 | + |
| 56 | + # Compression bit rates (bits per dimension) |
| 57 | + COMPRESSION_BITS = { |
| 58 | + "LVQ4": 4, |
| 59 | + "LVQ4x4": 8, |
| 60 | + "LVQ4x8": 12, |
| 61 | + "LVQ8": 8, |
| 62 | + "LeanVec4x8": 12, |
| 63 | + "LeanVec8x8": 16, |
| 64 | + } |
| 65 | + |
| 66 | + @staticmethod |
| 67 | + def recommend( |
| 68 | + dims: int, |
| 69 | + priority: Literal["speed", "memory", "balanced"] = "balanced", |
| 70 | + datatype: Optional[str] = None, |
| 71 | + ) -> SVSConfig: |
| 72 | + """Recommend compression settings based on dimensions and priorities. |
| 73 | +
|
| 74 | + Args: |
| 75 | + dims: Vector dimensionality (must be > 0) |
| 76 | + priority: Optimization priority: |
| 77 | + - "memory": Maximize memory savings |
| 78 | + - "speed": Optimize for query speed |
| 79 | + - "balanced": Balance between memory and speed |
| 80 | + datatype: Override datatype (default: float16 for high-dim, float32 for low-dim) |
| 81 | +
|
| 82 | + Returns: |
| 83 | + dict: Complete SVS-VAMANA configuration including: |
| 84 | + - algorithm: "svs-vamana" |
| 85 | + - datatype: Recommended datatype |
| 86 | + - compression: Compression type |
| 87 | + - reduce: Dimensionality reduction (for LeanVec only) |
| 88 | + - graph_max_degree: Graph connectivity |
| 89 | + - construction_window_size: Build-time candidates |
| 90 | + - search_window_size: Query-time candidates |
| 91 | +
|
| 92 | + Raises: |
| 93 | + ValueError: If dims <= 0 |
| 94 | +
|
| 95 | + Examples: |
| 96 | + >>> # High-dimensional embeddings (e.g., OpenAI ada-002) |
| 97 | + >>> config = CompressionAdvisor.recommend(dims=1536, priority="memory") |
| 98 | + >>> config["compression"] |
| 99 | + 'LeanVec4x8' |
| 100 | + >>> config["reduce"] |
| 101 | + 768 |
| 102 | +
|
| 103 | + >>> # Lower-dimensional embeddings |
| 104 | + >>> config = CompressionAdvisor.recommend(dims=384, priority="speed") |
| 105 | + >>> config["compression"] |
| 106 | + 'LVQ4x8' |
| 107 | + """ |
| 108 | + if dims <= 0: |
| 109 | + raise ValueError(f"dims must be positive, got {dims}") |
| 110 | + |
| 111 | + # High-dimensional vectors (>= 1024) - use LeanVec |
| 112 | + if dims >= CompressionAdvisor.HIGH_DIM_THRESHOLD: |
| 113 | + base = { |
| 114 | + "algorithm": "svs-vamana", |
| 115 | + "datatype": datatype or "float16", |
| 116 | + "graph_max_degree": 64, |
| 117 | + "construction_window_size": 300, |
| 118 | + } |
| 119 | + |
| 120 | + if priority == "memory": |
| 121 | + return cast( |
| 122 | + SVSConfig, |
| 123 | + { |
| 124 | + **base, |
| 125 | + "compression": "LeanVec4x8", |
| 126 | + "reduce": dims // 2, |
| 127 | + "search_window_size": 20, |
| 128 | + }, |
| 129 | + ) |
| 130 | + elif priority == "speed": |
| 131 | + return cast( |
| 132 | + SVSConfig, |
| 133 | + { |
| 134 | + **base, |
| 135 | + "compression": "LeanVec4x8", |
| 136 | + "reduce": max(256, dims // 4), |
| 137 | + "search_window_size": 40, |
| 138 | + }, |
| 139 | + ) |
| 140 | + else: # balanced |
| 141 | + return cast( |
| 142 | + SVSConfig, |
| 143 | + { |
| 144 | + **base, |
| 145 | + "compression": "LeanVec4x8", |
| 146 | + "reduce": dims // 2, |
| 147 | + "search_window_size": 30, |
| 148 | + }, |
| 149 | + ) |
| 150 | + |
| 151 | + # Lower-dimensional vectors - use LVQ |
| 152 | + else: |
| 153 | + base = { |
| 154 | + "algorithm": "svs-vamana", |
| 155 | + "datatype": datatype or "float32", |
| 156 | + "graph_max_degree": 40, |
| 157 | + "construction_window_size": 250, |
| 158 | + "search_window_size": 20, |
| 159 | + } |
| 160 | + |
| 161 | + if priority == "memory": |
| 162 | + return cast(SVSConfig, {**base, "compression": "LVQ4"}) |
| 163 | + elif priority == "speed": |
| 164 | + return cast(SVSConfig, {**base, "compression": "LVQ4x8"}) |
| 165 | + else: # balanced |
| 166 | + return cast(SVSConfig, {**base, "compression": "LVQ4x4"}) |
| 167 | + |
| 168 | + @staticmethod |
| 169 | + def estimate_memory_savings( |
| 170 | + compression: str, dims: int, reduce: Optional[int] = None |
| 171 | + ) -> float: |
| 172 | + """Estimate memory savings percentage from compression. |
| 173 | +
|
| 174 | + Calculates the percentage of memory saved compared to uncompressed float32 vectors. |
| 175 | +
|
| 176 | + Args: |
| 177 | + compression: Compression type (e.g., "LVQ4", "LeanVec4x8") |
| 178 | + dims: Original vector dimensionality |
| 179 | + reduce: Reduced dimensionality (for LeanVec compression) |
| 180 | +
|
| 181 | + Returns: |
| 182 | + float: Memory savings percentage (0-100) |
| 183 | +
|
| 184 | + Examples: |
| 185 | + >>> # LeanVec with dimensionality reduction |
| 186 | + >>> CompressionAdvisor.estimate_memory_savings( |
| 187 | + ... compression="LeanVec4x8", |
| 188 | + ... dims=1536, |
| 189 | + ... reduce=768 |
| 190 | + ... ) |
| 191 | + 81.2 |
| 192 | +
|
| 193 | + >>> # LVQ without dimensionality reduction |
| 194 | + >>> CompressionAdvisor.estimate_memory_savings( |
| 195 | + ... compression="LVQ4", |
| 196 | + ... dims=384 |
| 197 | + ... ) |
| 198 | + 87.5 |
| 199 | + """ |
| 200 | + # Base bits per dimension (float32) |
| 201 | + base_bits = 32 |
| 202 | + |
| 203 | + # Compressed bits per dimension |
| 204 | + compression_bits = CompressionAdvisor.COMPRESSION_BITS.get( |
| 205 | + compression, base_bits |
| 206 | + ) |
| 207 | + |
| 208 | + # Account for dimensionality reduction |
| 209 | + effective_dims = reduce if reduce else dims |
| 210 | + |
| 211 | + # Calculate savings |
| 212 | + original_size = dims * base_bits |
| 213 | + compressed_size = effective_dims * compression_bits |
| 214 | + savings = (1 - compressed_size / original_size) * 100 |
| 215 | + |
| 216 | + return round(savings, 1) |
0 commit comments