|
61 | 61 | import org.springframework.util.StringUtils; |
62 | 62 |
|
63 | 63 | /** |
64 | | - * Uses the "vector_store" table to store the Spring AI vector data. The table and the |
65 | | - * vector index will be auto-created if not available. |
| 64 | + * PostgreSQL-based vector store implementation using the pgvector extension. |
| 65 | + * |
| 66 | + * <p> |
| 67 | + * The store uses a database table to persist the vector embeddings along with their |
| 68 | + * associated document content and metadata. By default, it uses the "vector_store" table |
| 69 | + * in the "public" schema, but this can be configured. |
| 70 | + * </p> |
| 71 | + * |
| 72 | + * <p> |
| 73 | + * Features: |
| 74 | + * </p> |
| 75 | + * <ul> |
| 76 | + * <li>Automatic schema initialization with configurable table and index creation</li> |
| 77 | + * <li>Support for different distance metrics: Cosine, Euclidean, and Inner Product</li> |
| 78 | + * <li>Flexible indexing options: HNSW (default), IVFFlat, or exact search (no index)</li> |
| 79 | + * <li>Metadata filtering using JSON path expressions</li> |
| 80 | + * <li>Configurable similarity thresholds for search results</li> |
| 81 | + * <li>Batch processing support with configurable batch sizes</li> |
| 82 | + * </ul> |
| 83 | + * |
| 84 | + * <p> |
| 85 | + * Basic usage example: |
| 86 | + * </p> |
| 87 | + * <pre>{@code |
| 88 | + * PgVectorStore vectorStore = PgVectorStore.builder() |
| 89 | + * .jdbcTemplate(jdbcTemplate) |
| 90 | + * .embeddingModel(embeddingModel) |
| 91 | + * .dimensions(1536) // Optional: defaults to model dimensions or 1536 |
| 92 | + * .distanceType(PgDistanceType.COSINE_DISTANCE) |
| 93 | + * .indexType(PgIndexType.HNSW) |
| 94 | + * .build(); |
| 95 | + * |
| 96 | + * // Add documents |
| 97 | + * vectorStore.add(List.of( |
| 98 | + * new Document("content1", Map.of("key1", "value1")), |
| 99 | + * new Document("content2", Map.of("key2", "value2")) |
| 100 | + * )); |
| 101 | + * |
| 102 | + * // Search with filters |
| 103 | + * List<Document> results = vectorStore.similaritySearch( |
| 104 | + * SearchRequest.query("search text") |
| 105 | + * .withTopK(5) |
| 106 | + * .withSimilarityThreshold(0.7) |
| 107 | + * .withFilterExpression("key1 == 'value1'") |
| 108 | + * ); |
| 109 | + * }</pre> |
| 110 | + * |
| 111 | + * <p> |
| 112 | + * Advanced configuration example: |
| 113 | + * </p> |
| 114 | + * <pre>{@code |
| 115 | + * PgVectorStore vectorStore = PgVectorStore.builder() |
| 116 | + * .jdbcTemplate(jdbcTemplate) |
| 117 | + * .embeddingModel(embeddingModel) |
| 118 | + * .schemaName("custom_schema") |
| 119 | + * .vectorTableName("custom_vectors") |
| 120 | + * .distanceType(PgDistanceType.NEGATIVE_INNER_PRODUCT) |
| 121 | + * .removeExistingVectorStoreTable(true) |
| 122 | + * .initializeSchema(true) |
| 123 | + * .maxDocumentBatchSize(1000) |
| 124 | + * .build(); |
| 125 | + * }</pre> |
| 126 | + * |
| 127 | + * <p> |
| 128 | + * Database Requirements: |
| 129 | + * </p> |
| 130 | + * <ul> |
| 131 | + * <li>PostgreSQL with pgvector extension installed</li> |
| 132 | + * <li>Required extensions: vector, hstore, uuid-ossp</li> |
| 133 | + * <li>Table schema with id (uuid), content (text), metadata (json), and embedding |
| 134 | + * (vector) columns</li> |
| 135 | + * </ul> |
| 136 | + * |
| 137 | + * <p> |
| 138 | + * Distance Types: |
| 139 | + * </p> |
| 140 | + * <ul> |
| 141 | + * <li>COSINE_DISTANCE: Default, suitable for most use cases</li> |
| 142 | + * <li>EUCLIDEAN_DISTANCE: L2 distance between vectors</li> |
| 143 | + * <li>NEGATIVE_INNER_PRODUCT: Best performance for normalized vectors (e.g., OpenAI |
| 144 | + * embeddings)</li> |
| 145 | + * </ul> |
| 146 | + * |
| 147 | + * <p> |
| 148 | + * Index Types: |
| 149 | + * </p> |
| 150 | + * <ul> |
| 151 | + * <li>HNSW: Default, better query performance but slower builds and more memory</li> |
| 152 | + * <li>IVFFLAT: Faster builds, less memory, but lower query performance</li> |
| 153 | + * <li>NONE: Exact search without indexing</li> |
| 154 | + * </ul> |
66 | 155 | * |
67 | 156 | * @author Christian Tzolov |
68 | 157 | * @author Josh Long |
|
0 commit comments