Enhance Milvus storage module with advanced indexing options

howethomas · howethomas · commit 678b565fe605 · 2025-04-23T10:56:50.000-04:00
- Updated example_config.yml and README.md to include detailed vector index settings and options for different index types (IVF_FLAT, IVF_SQ8, IVF_PQ, HNSW, FLAT).
- Modified create_collection function in __init__.py to accept and configure index parameters dynamically based on user-defined options.
- Improved logging for index creation to provide better insights into the process.
diff --git a/example_config.yml b/example_config.yml
@@ -55,15 +55,33 @@ storages:
   milvus:
     module: storage.milvus
     options:
+      # Connection settings
       host: "localhost"
       port: "19530"
       collection_name: "vcons"
+      
+      # Embedding settings
       embedding_model: "text-embedding-3-small"
       embedding_dim: 1536
       api_key: "your-openai-api-key"
       organization: ""
+      
+      # Operation settings
       create_collection_if_missing: true
       skip_if_exists: true
+      
+      # Vector index settings (Default: IVF_FLAT with L2 distance)
+      index_type: "IVF_FLAT"  # Options: IVF_FLAT, IVF_SQ8, IVF_PQ, HNSW, FLAT
+      metric_type: "L2"       # Options: L2, IP, COSINE
+      nlist: 128              # For IVF indexes: number of clusters
+      
+      # Advanced HNSW settings (used only if index_type is HNSW)
+      # m: 16                 # Number of edges per node 
+      # ef_construction: 200  # Size of dynamic candidate list during construction
+      
+      # Advanced IVF_PQ settings (used only if index_type is IVF_PQ)
+      # pq_m: 8               # Number of sub-quantizers
+      # pq_nbits: 8           # Bit depth per quantizer
 chains:
   sample_chain:
     links:
diff --git a/server/storage/milvus/README.md b/server/storage/milvus/README.md
@@ -29,17 +29,84 @@ storages:
   milvus:
     module: storage.milvus
     options:
+      # Connection settings
       host: "localhost"                  # Milvus server host
       port: "19530"                      # Milvus server port
       collection_name: "vcons"           # Name of the collection in Milvus
+      
+      # Embedding settings
       embedding_model: "text-embedding-3-small"  # OpenAI embedding model
       embedding_dim: 1536                # Dimensions for the chosen model
       api_key: "your-openai-api-key"     # Your OpenAI API key
       organization: "your-org-id"        # Optional: Your OpenAI organization ID
+      
+      # Operation settings
       create_collection_if_missing: true # Whether to create collection if it doesn't exist
       skip_if_exists: true               # Skip storing vCons that already exist
+      
+      # Vector index settings (optional, shown are defaults)
+      index_type: "IVF_FLAT"             # Vector index type
+      metric_type: "L2"                  # Distance metric type
+      nlist: 128                         # Number of clusters for IVF indexes
 ```
 
+### Vector Index Types
+
+The module supports different vector index types with appropriate parameters:
+
+#### IVF_FLAT (Default)
+Good balance of search accuracy and speed. Uses more storage but gives exact results within each cluster.
+
+```yaml
+index_type: "IVF_FLAT"  
+metric_type: "L2"       # Or "IP" for inner product, or "COSINE"
+nlist: 128              # Number of clusters, higher values = faster search but less accurate
+```
+
+#### IVF_SQ8
+Similar to IVF_FLAT but with scalar quantization (8-bit) to reduce memory usage. Good for large datasets.
+
+```yaml
+index_type: "IVF_SQ8"
+metric_type: "L2"
+nlist: 128
+```
+
+#### IVF_PQ
+Product Quantization for maximum memory optimization. Sacrifices some accuracy for much smaller index size.
+
+```yaml
+index_type: "IVF_PQ"
+metric_type: "L2"
+nlist: 128
+pq_m: 8                 # Number of sub-quantizers
+pq_nbits: 8             # Bit depth per quantizer
+```
+
+#### HNSW
+Hierarchical Navigable Small World graph index. Very fast for searching, especially with smaller datasets.
+
+```yaml
+index_type: "HNSW"
+metric_type: "L2"
+m: 16                   # Number of edges per node
+ef_construction: 200    # Size of the dynamic candidate list during construction
+```
+
+#### FLAT
+The simplest index that compares to every vector. Most accurate but slowest for large datasets.
+
+```yaml
+index_type: "FLAT"
+metric_type: "L2"
+```
+
+### Distance Metrics
+
+- `L2`: Euclidean distance (default). Good for most embeddings including OpenAI embeddings.
+- `IP`: Inner product. Use when vectors are normalized and you want to measure closeness.
+- `COSINE`: Cosine similarity. Good for measuring the angle between vectors regardless of magnitude.
+
 ## Searching vCons in Milvus
 
 While not part of the storage module itself, you can search vCons in Milvus using the pymilvus client:
diff --git a/server/storage/milvus/__init__.py b/server/storage/milvus/__init__.py
@@ -33,6 +33,11 @@
     "organization": None,  # OpenAI organization ID
     "create_collection_if_missing": False,  # Whether to create collection if it doesn't exist
     "skip_if_exists": True,  # Skip storing vCons that already exist in Milvus
+    "index_type": "IVF_FLAT",  # Vector index type: IVF_FLAT, IVF_SQ8, IVF_PQ, HNSW, ANNOY, etc.
+    "metric_type": "L2",   # Distance metric: L2 (Euclidean), IP (Inner Product), COSINE
+    "nlist": 128,          # Number of clusters for IVF indexes
+    "m": 16,               # HNSW parameter: number of edges per node
+    "ef_construction": 200, # HNSW parameter: size of the dynamic candidate list during construction
 }
 
 def ensure_milvus_connection(host: str, port: str) -> bool:
@@ -266,13 +271,14 @@ def extract_party_id(vcon: dict) -> str:
     logger.debug("No usable party identifier found")
     return "unknown_party"
 
-def create_collection(collection_name: str, embedding_dim: int) -> Union[Collection, None]:
+def create_collection(collection_name: str, embedding_dim: int, opts: dict) -> Union[Collection, None]:
     """
     Create a new Milvus collection for vCons.
     
     Args:
         collection_name: Name for the new collection
         embedding_dim: Dimension of the embedding vectors
+        opts: Configuration options including index parameters
         
     Returns:
         Collection or None: The created collection or None if failed
@@ -301,15 +307,53 @@ def create_collection(collection_name: str, embedding_dim: int) -> Union[Collect
         # Create collection
         collection = Collection(name=collection_name, schema=schema)
         
-        # Create an IVF_FLAT index for fast vector search
+        # Prepare index parameters based on the selected index type
+        index_type = opts.get("index_type", "IVF_FLAT")
+        metric_type = opts.get("metric_type", "L2")
+        
+        # Configure index parameters based on index type
+        if index_type.startswith("IVF"):  # IVF_FLAT, IVF_SQ8, IVF_PQ
+            params = {"nlist": opts.get("nlist", 128)}
+            
+            # Additional params for IVF_PQ
+            if index_type == "IVF_PQ":
+                # For PQ, m is typically set to 8 or 12
+                params["m"] = opts.get("pq_m", 8)
+                # nbits is typically 8
+                params["nbits"] = opts.get("pq_nbits", 8)
+                
+        elif index_type == "HNSW":
+            params = {
+                "M": opts.get("m", 16),  # Number of edges per node
+                "efConstruction": opts.get("ef_construction", 200)  # Size of the dynamic candidate list during construction
+            }
+            
+        elif index_type == "ANNOY":
+            params = {
+                "n_trees": opts.get("n_trees", 50)  # Number of trees for ANNOY
+            }
+            
+        elif index_type == "FLAT":
+            # FLAT index doesn't need additional parameters
+            params = {}
+            
+        else:
+            # Default to IVF_FLAT if index type is not recognized
+            logger.warning(f"Unrecognized index type {index_type}, defaulting to IVF_FLAT")
+            index_type = "IVF_FLAT"
+            params = {"nlist": opts.get("nlist", 128)}
+        
+        # Create the index
         index_params = {
-            "metric_type": "L2",
-            "index_type": "IVF_FLAT",
-            "params": {"nlist": 128}
+            "metric_type": metric_type,
+            "index_type": index_type,
+            "params": params
         }
+        
+        logger.info(f"Creating index of type {index_type} with metric {metric_type}")
         collection.create_index(field_name="embedding", index_params=index_params)
         
-        logger.info(f"Created collection '{collection_name}' successfully")
+        logger.info(f"Created collection '{collection_name}' successfully with {index_type} index")
         return collection
     except Exception as e:
         logger.error(f"Failed to create collection: {e}")
@@ -368,7 +412,7 @@ def save(vcon_uuid: str, opts=default_options) -> None:
     if not utility.has_collection(collection_name):
         if opts["create_collection_if_missing"]:
             logger.info(f"Collection {collection_name} does not exist, creating...")
-            collection = create_collection(collection_name, opts["embedding_dim"])
+            collection = create_collection(collection_name, opts["embedding_dim"], opts)
             if not collection:
                 error_msg = f"Failed to create collection {collection_name}"
                 logger.error(error_msg)