diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc index 41449d5086c..cfe478c0734 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc @@ -236,6 +236,104 @@ TokenCountBatchingStrategy strategy = new TokenCountBatchingStrategy( ); ---- +=== Working with Auto-Truncation + +Some embedding models, such as Vertex AI text embedding, support an `auto_truncate` feature. +When enabled, this feature allows the embedding model to silently truncate text that exceeds the maximum input size and continue processing. +When disabled, the model throws an explicit error for input exceeding the limits. + +When using auto-truncation with the batching strategy, you need a different configuration approach to avoid the exceptions that occur when a single document exceeds the expected token limit. + +==== Configuration for Auto-Truncation + +When enabling auto-truncation, configure your batching strategy with a much higher input token count than the model's actual maximum. +This prevents the batching strategy from throwing exceptions and allows the embedding model to handle truncation internally. + +Here's an example configuration for using Vertex AI with auto-truncation and custom `BatchingStrategy` and then using them in the PgVectorStore: + +[source,java] +---- +@Configuration +public class AutoTruncationEmbeddingConfig { + + @Bean + public VertexAiTextEmbeddingModel vertexAiEmbeddingModel( + VertexAiEmbeddingConnectionDetails connectionDetails) { + + VertexAiTextEmbeddingOptions options = VertexAiTextEmbeddingOptions.builder() + .model(VertexAiTextEmbeddingOptions.DEFAULT_MODEL_NAME) + .autoTruncate(true) // Enable auto-truncation + .build(); + + return new VertexAiTextEmbeddingModel(connectionDetails, options); + } + + @Bean + public BatchingStrategy batchingStrategy() { + // Set a much higher token count than the model actually supports + // (e.g., 132,900 when Vertex AI supports only up to 20,000) + return new TokenCountBatchingStrategy( + EncodingType.CL100K_BASE, + 132900, // Artificially high limit + 0.1 // 10% reserve + ); + } + + @Bean + public VectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel, BatchingStrategy batchingStrategy) { + return PgVectorStore.builder(jdbcTemplate, embeddingModel) + // other properties omitted here + .build(); + } +} +---- + +In this configuration: + +1. The embedding model has auto-truncation enabled, allowing it to handle oversized inputs gracefully +2. The batching strategy uses an artificially high token limit (132,900) that's much larger than the actual model limit (20,000) +3. The vector store uses the embedding model configured and the custom `BatchingStrartegy` bean. + +==== Why This Works + +This approach works because: + +1. The `TokenCountBatchingStrategy` checks if any single document exceeds the configured maximum and throws an `IllegalArgumentException` if it does +2. By setting a very high limit in the batching strategy, we ensure that this check never fails +3. Documents or batches that exceed the actual model limit are handled by the embedding model's auto-truncation feature +4. The embedding model silently truncates excess tokens and continues processing + +==== Best Practices + +When using auto-truncation: + +- Set the batching strategy's max input token count to be at least 5-10x larger than the model's actual limit +- Monitor your logs for truncation warnings from the embedding model +- Consider the implications of silent truncation on your embedding quality +- Test with sample documents to ensure truncated embeddings still meet your requirements + +CAUTION: While auto-truncation prevents errors, it can result in incomplete embeddings. Important information at the end of long documents may be lost. +Consider document chunking strategies if preserving all content is critical. + +==== Spring Boot Auto-Configuration + +If you're using Spring Boot auto-configuration, you must provide a custom `BatchingStrategy` bean to override the default one that comes with Spring AI: + +[source,java] +---- +@Bean +public BatchingStrategy customBatchingStrategy() { + // This bean will override the default BatchingStrategy + return new TokenCountBatchingStrategy( + EncodingType.CL100K_BASE, + 132900, // Much higher than model's actual limit + 0.1 + ); +} +---- + +The presence of this bean in your application context will automatically replace the default batching strategy used by all vector stores. + === Custom Implementation While `TokenCountBatchingStrategy` provides a robust default implementation, you can customize the batching strategy to fit your specific needs. diff --git a/vector-stores/spring-ai-pgvector-store/pom.xml b/vector-stores/spring-ai-pgvector-store/pom.xml index 878cd0a8cc5..44451b53d0c 100644 --- a/vector-stores/spring-ai-pgvector-store/pom.xml +++ b/vector-stores/spring-ai-pgvector-store/pom.xml @@ -77,6 +77,13 @@ test + + org.springframework.ai + spring-ai-vertex-ai-embedding + ${project.parent.version} + test + + org.springframework.ai diff --git a/vector-stores/spring-ai-pgvector-store/src/test/java/org/springframework/ai/vectorstore/pgvector/PgVectorStoreAutoTruncationIT.java b/vector-stores/spring-ai-pgvector-store/src/test/java/org/springframework/ai/vectorstore/pgvector/PgVectorStoreAutoTruncationIT.java new file mode 100644 index 00000000000..a35106b1d07 --- /dev/null +++ b/vector-stores/spring-ai-pgvector-store/src/test/java/org/springframework/ai/vectorstore/pgvector/PgVectorStoreAutoTruncationIT.java @@ -0,0 +1,231 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.vectorstore.pgvector; + +import java.util.ArrayList; +import java.util.List; + +import javax.sql.DataSource; + +import com.knuddels.jtokkit.api.EncodingType; +import com.zaxxer.hikari.HikariDataSource; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.BatchingStrategy; +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.embedding.TokenCountBatchingStrategy; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.vertexai.embedding.VertexAiEmbeddingConnectionDetails; +import org.springframework.ai.vertexai.embedding.text.VertexAiTextEmbeddingModel; +import org.springframework.ai.vertexai.embedding.text.VertexAiTextEmbeddingOptions; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; +import org.springframework.context.ApplicationContext; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Primary; +import org.springframework.jdbc.core.JdbcTemplate; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertThrows; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +/** + * Integration tests for PgVectorStore with auto-truncation enabled. Tests the behavior + * when using artificially high token limits with Vertex AI's auto-truncation feature. + * + * @author Soby Chacko + */ +@Testcontainers +@EnabledIfEnvironmentVariable(named = "VERTEX_AI_GEMINI_PROJECT_ID", matches = ".*") +@EnabledIfEnvironmentVariable(named = "VERTEX_AI_GEMINI_LOCATION", matches = ".*") +public class PgVectorStoreAutoTruncationIT { + + private static final int ARTIFICIAL_TOKEN_LIMIT = 132_900; + + @Container + @SuppressWarnings("resource") + static PostgreSQLContainer postgresContainer = new PostgreSQLContainer<>(PgVectorImage.DEFAULT_IMAGE) + .withUsername("postgres") + .withPassword("postgres"); + + private final ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withUserConfiguration(PgVectorStoreAutoTruncationIT.TestApplication.class) + .withPropertyValues("test.spring.ai.vectorstore.pgvector.distanceType=COSINE_DISTANCE", + + // JdbcTemplate configuration + String.format("app.datasource.url=jdbc:postgresql://%s:%d/%s", postgresContainer.getHost(), + postgresContainer.getMappedPort(5432), "postgres"), + "app.datasource.username=postgres", "app.datasource.password=postgres", + "app.datasource.type=com.zaxxer.hikari.HikariDataSource"); + + private static void dropTable(ApplicationContext context) { + JdbcTemplate jdbcTemplate = context.getBean(JdbcTemplate.class); + jdbcTemplate.execute("DROP TABLE IF EXISTS vector_store"); + } + + @Test + public void testAutoTruncationWithLargeDocument() { + this.contextRunner.run(context -> { + VectorStore vectorStore = context.getBean(VectorStore.class); + + // Test with a document that exceeds normal token limits but is within our + // artificially high limit + String largeContent = "This is a test document. ".repeat(5000); // ~25,000 + // tokens + Document largeDocument = new Document(largeContent); + largeDocument.getMetadata().put("test", "auto-truncation"); + + // This should not throw an exception due to our high token limit in + // BatchingStrategy + assertDoesNotThrow(() -> vectorStore.add(List.of(largeDocument))); + + // Verify the document was stored + List results = vectorStore + .similaritySearch(SearchRequest.builder().query("test document").topK(1).build()); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getMetadata()).containsEntry("test", "auto-truncation"); + + // Test with multiple large documents to ensure batching still works + List largeDocs = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + Document doc = new Document("Large content " + i + " ".repeat(4000)); + doc.getMetadata().put("batch", String.valueOf(i)); + largeDocs.add(doc); + } + + assertDoesNotThrow(() -> vectorStore.add(largeDocs)); + + // Verify all documents were processed + List batchResults = vectorStore + .similaritySearch(SearchRequest.builder().query("Large content").topK(5).build()); + + assertThat(batchResults).hasSizeGreaterThanOrEqualTo(5); + + // Clean up + vectorStore.delete(List.of(largeDocument.getId())); + largeDocs.forEach(doc -> vectorStore.delete(List.of(doc.getId()))); + + dropTable(context); + }); + } + + @Test + public void testExceedingArtificialLimit() { + this.contextRunner.run(context -> { + BatchingStrategy batchingStrategy = context.getBean(BatchingStrategy.class); + + // Create a document that exceeds even our artificially high limit + String massiveContent = "word ".repeat(150000); // ~150,000 tokens (exceeds + // 132,900) + Document massiveDocument = new Document(massiveContent); + + // This should throw an exception as it exceeds our configured limit + assertThrows(IllegalArgumentException.class, () -> { + batchingStrategy.batch(List.of(massiveDocument)); + }); + + dropTable(context); + }); + } + + @SpringBootConfiguration + @EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class }) + public static class TestApplication { + + @Value("${test.spring.ai.vectorstore.pgvector.distanceType}") + PgVectorStore.PgDistanceType distanceType; + + @Value("${test.spring.ai.vectorstore.pgvector.initializeSchema:true}") + boolean initializeSchema; + + @Value("${test.spring.ai.vectorstore.pgvector.idType:UUID}") + PgVectorStore.PgIdType idType; + + @Bean + public VectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel, + BatchingStrategy batchingStrategy) { + return PgVectorStore.builder(jdbcTemplate, embeddingModel) + .dimensions(PgVectorStore.INVALID_EMBEDDING_DIMENSION) + .batchingStrategy(batchingStrategy) + .idType(this.idType) + .distanceType(this.distanceType) + .initializeSchema(this.initializeSchema) + .indexType(PgVectorStore.PgIndexType.HNSW) + .removeExistingVectorStoreTable(true) + .build(); + } + + @Bean + public JdbcTemplate myJdbcTemplate(DataSource dataSource) { + return new JdbcTemplate(dataSource); + } + + @Bean + @Primary + @ConfigurationProperties("app.datasource") + public DataSourceProperties dataSourceProperties() { + return new DataSourceProperties(); + } + + @Bean + public HikariDataSource dataSource(DataSourceProperties dataSourceProperties) { + return dataSourceProperties.initializeDataSourceBuilder().type(HikariDataSource.class).build(); + } + + @Bean + public VertexAiTextEmbeddingModel vertexAiEmbeddingModel(VertexAiEmbeddingConnectionDetails connectionDetails) { + VertexAiTextEmbeddingOptions options = VertexAiTextEmbeddingOptions.builder() + .model(VertexAiTextEmbeddingOptions.DEFAULT_MODEL_NAME) + // Although this might be the default in Vertex, we are explicitly setting + // this to true to ensure + // that auto truncate is turned on as this is crucial for the + // verifications in this test suite. + .autoTruncate(true) + .build(); + + return new VertexAiTextEmbeddingModel(connectionDetails, options); + } + + @Bean + public VertexAiEmbeddingConnectionDetails connectionDetails() { + return VertexAiEmbeddingConnectionDetails.builder() + .projectId(System.getenv("VERTEX_AI_GEMINI_PROJECT_ID")) + .location(System.getenv("VERTEX_AI_GEMINI_LOCATION")) + .build(); + } + + @Bean + BatchingStrategy pgVectorStoreBatchingStrategy() { + return new TokenCountBatchingStrategy(EncodingType.CL100K_BASE, ARTIFICIAL_TOKEN_LIMIT, 0.1); + } + + } + +}