|
1 | 1 | # magda-pdf-semantic-indexer |
2 | 2 |
|
3 | | - |
| 3 | + |
4 | 4 |
|
5 | 5 | A Helm chart for Magda PDF Semantic Indexer |
6 | 6 |
|
@@ -36,14 +36,13 @@ Kubernetes: `>= 1.14.0-0` |
36 | 36 | | defaultSemanticIndexerConfig.overlap | int | `50` | | |
37 | 37 | | defaultSemanticIndexerConfig.overlap | int | `50` | | |
38 | 38 | | embeddingApiURL | string | `"http://magda-embedding-api"` | | |
39 | | -| global | object | `{"image":{},"rollingUpdate":{},"searchEngine":{"defaultDatasetBucket":"magda-datasets","semanticIndexer":{"indexName":null,"indexVersion":null,"knnVectorFieldConfig":{"compressionLevel":null,"dimension":768,"efConstruction":100,"efSearch":100,"encoder":{"clip":false,"name":"sq","type":"fp16"},"m":16,"mode":"in_memory","spaceType":"l2"},"numberOfReplicas":0,"numberOfShards":1}}}` | only for providing appropriate default value for helm lint | |
40 | | -| global.searchEngine.semanticIndexer.knnVectorFieldConfig.compressionLevel | string | `nil` | The compression_level mapping parameter selects a quantization encoder that reduces vector memory consumption by the given factor. | |
| 39 | +| global | object | `{"image":{},"rollingUpdate":{},"searchEngine":{"defaultDatasetBucket":"magda-datasets","semanticIndexer":{"indexName":null,"indexVersion":null,"knnVectorFieldConfig":{"compressionLevel":"32x","dimension":768,"efConstruction":100,"efSearch":100,"m":16,"mode":"on_disk","spaceType":"l2"},"numberOfReplicas":0,"numberOfShards":1}}}` | only for providing appropriate default value for helm lint | |
| 40 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.compressionLevel | string | `"32x"` | The compression_level mapping parameter selects a quantization encoder that reduces vector memory consumption by the given factor. | |
41 | 41 | | global.searchEngine.semanticIndexer.knnVectorFieldConfig.dimension | int | `768` | Dimension of the embedding vectors. | |
42 | 42 | | global.searchEngine.semanticIndexer.knnVectorFieldConfig.efConstruction | int | `100` | Similar to efSearch but used during index construction. Higher values improve search quality but increase index build time. | |
43 | 43 | | global.searchEngine.semanticIndexer.knnVectorFieldConfig.efSearch | int | `100` | The size of the candidate queue during search. Larger values may improve search quality but increase search latency. | |
44 | | -| global.searchEngine.semanticIndexer.knnVectorFieldConfig.encoder | object | `{"clip":false,"name":"sq","type":"fp16"}` | FAISS Encoder configuration (If compressionLevel is set, encoder will be ignored). | |
45 | 44 | | global.searchEngine.semanticIndexer.knnVectorFieldConfig.m | int | `16` | The maximum number of graph edges per vector. Higher values increase memory usage but may improve search quality. | |
46 | | -| global.searchEngine.semanticIndexer.knnVectorFieldConfig.mode | string | `"in_memory"` | Vector workload mode: `on_disk` or `in_memory`. | |
| 45 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.mode | string | `"on_disk"` | Vector workload mode: `on_disk` or `in_memory`. | |
47 | 46 | | image.name | string | `"magda-pdf-semantic-indexer"` | | |
48 | 47 | | minioConfig.defaultDatasetBucket | string | `""` | | |
49 | 48 | | minioConfig.endPoint | string | `"magda-minio"` | | |
|
0 commit comments