diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java index 176780ebe51..889392f92ba 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java @@ -76,12 +76,15 @@ public class BeanOutputConverter implements StructuredOutputConverter { /** Holds the generated JSON schema for the target type. */ private String jsonSchema; + /** The text cleaner used to preprocess LLM responses before parsing. */ + private final ResponseTextCleaner textCleaner; + /** * Constructor to initialize with the target type's class. * @param clazz The target type's class. */ public BeanOutputConverter(Class clazz) { - this(ParameterizedTypeReference.forType(clazz)); + this(clazz, null, null); } /** @@ -91,7 +94,18 @@ public BeanOutputConverter(Class clazz) { * @param objectMapper Custom object mapper for JSON operations. endings. */ public BeanOutputConverter(Class clazz, ObjectMapper objectMapper) { - this(ParameterizedTypeReference.forType(clazz), objectMapper); + this(clazz, objectMapper, null); + } + + /** + * Constructor to initialize with the target type's class, a custom object mapper, and + * a custom text cleaner. + * @param clazz The target type's class. + * @param objectMapper Custom object mapper for JSON operations. + * @param textCleaner Custom text cleaner for preprocessing responses. + */ + public BeanOutputConverter(Class clazz, ObjectMapper objectMapper, ResponseTextCleaner textCleaner) { + this(ParameterizedTypeReference.forType(clazz), objectMapper, textCleaner); } /** @@ -99,7 +113,7 @@ public BeanOutputConverter(Class clazz, ObjectMapper objectMapper) { * @param typeRef The target class type reference. */ public BeanOutputConverter(ParameterizedTypeReference typeRef) { - this(typeRef.getType(), null); + this(typeRef, null, null); } /** @@ -110,7 +124,19 @@ public BeanOutputConverter(ParameterizedTypeReference typeRef) { * @param objectMapper Custom object mapper for JSON operations. endings. */ public BeanOutputConverter(ParameterizedTypeReference typeRef, ObjectMapper objectMapper) { - this(typeRef.getType(), objectMapper); + this(typeRef, objectMapper, null); + } + + /** + * Constructor to initialize with the target class type reference, a custom object + * mapper, and a custom text cleaner. + * @param typeRef The target class type reference. + * @param objectMapper Custom object mapper for JSON operations. + * @param textCleaner Custom text cleaner for preprocessing responses. + */ + public BeanOutputConverter(ParameterizedTypeReference typeRef, ObjectMapper objectMapper, + ResponseTextCleaner textCleaner) { + this(typeRef.getType(), objectMapper, textCleaner); } /** @@ -119,14 +145,42 @@ public BeanOutputConverter(ParameterizedTypeReference typeRef, ObjectMapper o * platform. * @param type The target class type. * @param objectMapper Custom object mapper for JSON operations. endings. + * @param textCleaner Custom text cleaner for preprocessing responses. */ - private BeanOutputConverter(Type type, ObjectMapper objectMapper) { + private BeanOutputConverter(Type type, ObjectMapper objectMapper, ResponseTextCleaner textCleaner) { Objects.requireNonNull(type, "Type cannot be null;"); this.type = type; this.objectMapper = objectMapper != null ? objectMapper : getObjectMapper(); + this.textCleaner = textCleaner != null ? textCleaner : createDefaultTextCleaner(); generateSchema(); } + /** + * Creates the default text cleaner that handles common response formats from various + * AI models. + *

+ * The default cleaner includes: + *

    + *
  • {@link ThinkingTagCleaner} - Removes thinking tags from models like Amazon Nova + * and Qwen. For models that don't generate thinking tags, this has minimal + * performance impact due to fast-path optimization.
  • + *
  • {@link MarkdownCodeBlockCleaner} - Removes markdown code block formatting.
  • + *
  • {@link WhitespaceCleaner} - Trims whitespace.
  • + *
+ *

+ * To customize the cleaning behavior, provide a custom {@link ResponseTextCleaner} + * via the constructor. + * @return a composite text cleaner with default cleaning strategies + */ + private static ResponseTextCleaner createDefaultTextCleaner() { + return CompositeResponseTextCleaner.builder() + .addCleaner(new WhitespaceCleaner()) + .addCleaner(new ThinkingTagCleaner()) + .addCleaner(new MarkdownCodeBlockCleaner()) + .addCleaner(new WhitespaceCleaner()) // Final trim after all cleanups + .build(); + } + /** * Generates the JSON schema for the target type. */ @@ -166,26 +220,9 @@ private void generateSchema() { @Override public T convert(@NonNull String text) { try { - // Remove leading and trailing whitespace - text = text.trim(); - - // Check for and remove triple backticks and "json" identifier - if (text.startsWith("```") && text.endsWith("```")) { - // Remove the first line if it contains "```json" - String[] lines = text.split("\n", 2); - if (lines[0].trim().equalsIgnoreCase("```json")) { - text = lines.length > 1 ? lines[1] : ""; - } - else { - text = text.substring(3); // Remove leading ``` - } - - // Remove trailing ``` - text = text.substring(0, text.length() - 3); - - // Trim again to remove any potential whitespace - text = text.trim(); - } + // Clean the text using the configured text cleaner + text = this.textCleaner.clean(text); + return (T) this.objectMapper.readValue(text, this.objectMapper.constructType(this.type)); } catch (JsonProcessingException e) { diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java new file mode 100644 index 00000000000..d1def873f82 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/CompositeResponseTextCleaner.java @@ -0,0 +1,108 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.springframework.util.Assert; + +/** + * A composite {@link ResponseTextCleaner} that applies multiple cleaners in sequence. + * This allows for a flexible pipeline of text cleaning operations. + * + * @author liugddx + * @since 1.1.0 + */ +public class CompositeResponseTextCleaner implements ResponseTextCleaner { + + private final List cleaners; + + /** + * Creates a composite cleaner with the given cleaners. + * @param cleaners the list of cleaners to apply in order + */ + public CompositeResponseTextCleaner(List cleaners) { + Assert.notNull(cleaners, "cleaners cannot be null"); + this.cleaners = new ArrayList<>(cleaners); + } + + /** + * Creates a composite cleaner with no cleaners. Text will be returned unchanged. + */ + public CompositeResponseTextCleaner() { + this(new ArrayList<>()); + } + + /** + * Creates a composite cleaner with the given cleaners. + * @param cleaners the cleaners to apply in order + */ + public CompositeResponseTextCleaner(ResponseTextCleaner... cleaners) { + this(Arrays.asList(cleaners)); + } + + @Override + public String clean(String text) { + String result = text; + for (ResponseTextCleaner cleaner : this.cleaners) { + result = cleaner.clean(result); + } + return result; + } + + /** + * Creates a builder for constructing a composite cleaner. + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for {@link CompositeResponseTextCleaner}. + */ + public static final class Builder { + + private final List cleaners = new ArrayList<>(); + + private Builder() { + } + + /** + * Add a cleaner to the pipeline. + * @param cleaner the cleaner to add + * @return this builder + */ + public Builder addCleaner(ResponseTextCleaner cleaner) { + Assert.notNull(cleaner, "cleaner cannot be null"); + this.cleaners.add(cleaner); + return this; + } + + /** + * Build the composite cleaner. + * @return a new composite cleaner instance + */ + public CompositeResponseTextCleaner build() { + return new CompositeResponseTextCleaner(this.cleaners); + } + + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java new file mode 100644 index 00000000000..cd81634fc55 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/MarkdownCodeBlockCleaner.java @@ -0,0 +1,73 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +/** + * A {@link ResponseTextCleaner} that removes markdown code block formatting from LLM + * responses. This cleaner handles: + *

    + *
  • {@code ```json ... ```}
  • + *
  • {@code ``` ... ```}
  • + *
+ * + * @author liugddx + * @since 1.1.0 + */ +public class MarkdownCodeBlockCleaner implements ResponseTextCleaner { + + @Override + public String clean(String text) { + if (text == null || text.isEmpty()) { + return text; + } + + // Trim leading and trailing whitespace first + text = text.trim(); + + // Check for and remove triple backticks + if (text.startsWith("```") && text.endsWith("```")) { + // Remove the first line if it contains "```json" or similar + String[] lines = text.split("\n", 2); + if (lines[0].trim().toLowerCase().startsWith("```")) { + // Extract language identifier if present + String firstLine = lines[0].trim(); + if (firstLine.length() > 3) { + // Has language identifier like ```json + text = lines.length > 1 ? lines[1] : ""; + } + else { + // Just ``` without language + text = text.substring(3); + } + } + else { + text = text.substring(3); + } + + // Remove trailing ``` + if (text.endsWith("```")) { + text = text.substring(0, text.length() - 3); + } + + // Trim again to remove any potential whitespace + text = text.trim(); + } + + return text; + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java new file mode 100644 index 00000000000..866b495b4f4 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ResponseTextCleaner.java @@ -0,0 +1,37 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +/** + * Strategy interface for cleaning LLM response text before parsing. Different + * implementations can handle various response formats and patterns from different AI + * models. + * + * @author liugddx + * @since 1.1.0 + */ +@FunctionalInterface +public interface ResponseTextCleaner { + + /** + * Clean the given text by removing unwanted patterns, tags, or formatting. + * @param text the raw text from LLM response + * @return the cleaned text ready for parsing + */ + String clean(String text); + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java new file mode 100644 index 00000000000..11892b48151 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/ThinkingTagCleaner.java @@ -0,0 +1,185 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; + +import org.springframework.util.Assert; + +/** + * A {@link ResponseTextCleaner} that removes thinking tags from LLM responses. This + * cleaner supports multiple tag patterns to handle different AI models: + *
    + *
  • Amazon Nova: {@code ...}
  • + *
  • Qwen models: {@code ...}
  • + *
  • DeepSeek models: various thinking patterns
  • + *
  • Claude models: thinking blocks in different formats
  • + *
+ *

+ * Performance: This cleaner includes fast-path optimization. For responses without + * thinking tags (most models), it performs a quick character check and returns + * immediately, making it safe to use as a default cleaner even for non-thinking models. + * + * @author liugddx + * @since 1.1.0 + */ +public class ThinkingTagCleaner implements ResponseTextCleaner { + + /** + * Default thinking tag patterns used by common AI models. + */ + private static final List DEFAULT_PATTERNS = Arrays.asList( + // Amazon Nova: ... + Pattern.compile("(?s).*?\\s*", Pattern.CASE_INSENSITIVE), + // Qwen models: ... + Pattern.compile("(?s).*?\\s*", Pattern.CASE_INSENSITIVE), + // Alternative XML-style tags + Pattern.compile("(?s).*?\\s*", Pattern.CASE_INSENSITIVE), + // Markdown style thinking blocks + Pattern.compile("(?s)```thinking.*?```\\s*", Pattern.CASE_INSENSITIVE), + // Some models use comment-style + Pattern.compile("(?s)\\s*", Pattern.CASE_INSENSITIVE)); + + private final List patterns; + + /** + * Creates a cleaner with default thinking tag patterns. + */ + public ThinkingTagCleaner() { + this(DEFAULT_PATTERNS); + } + + /** + * Creates a cleaner with custom patterns. + * @param patterns the list of regex patterns to match thinking tags + */ + public ThinkingTagCleaner(List patterns) { + Assert.notNull(patterns, "patterns cannot be null"); + Assert.notEmpty(patterns, "patterns cannot be empty"); + this.patterns = new ArrayList<>(patterns); + } + + /** + * Creates a cleaner with custom pattern strings. + * @param patternStrings the list of regex pattern strings to match thinking tags + */ + public ThinkingTagCleaner(String... patternStrings) { + Assert.notNull(patternStrings, "patternStrings cannot be null"); + Assert.notEmpty(patternStrings, "patternStrings cannot be empty"); + this.patterns = new ArrayList<>(); + for (String patternString : patternStrings) { + this.patterns.add(Pattern.compile(patternString, Pattern.CASE_INSENSITIVE)); + } + } + + @Override + public String clean(String text) { + if (text == null || text.isEmpty()) { + return text; + } + + // Fast path: if text doesn't contain '<' character, no tags to remove + if (!text.contains("<") && !text.contains("`")) { + return text; + } + + String result = text; + for (Pattern pattern : this.patterns) { + String afterReplacement = pattern.matcher(result).replaceAll(""); + // If replacement occurred, update result and continue checking other patterns + // (since multiple tag types might coexist) + if (!afterReplacement.equals(result)) { + result = afterReplacement; + } + } + return result; + } + + /** + * Creates a builder for constructing a thinking tag cleaner. + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for {@link ThinkingTagCleaner}. + */ + public static final class Builder { + + private final List patterns = new ArrayList<>(DEFAULT_PATTERNS); + + private boolean useDefaultPatterns = true; + + private Builder() { + } + + /** + * Disable default patterns. Only custom patterns added via + * {@link #addPattern(String)} or {@link #addPattern(Pattern)} will be used. + * @return this builder + */ + public Builder withoutDefaultPatterns() { + this.useDefaultPatterns = false; + return this; + } + + /** + * Add a custom pattern string. + * @param patternString the regex pattern string + * @return this builder + */ + public Builder addPattern(String patternString) { + Assert.hasText(patternString, "patternString cannot be empty"); + if (!this.useDefaultPatterns) { + this.patterns.clear(); + this.useDefaultPatterns = true; // Reset flag after first custom pattern + } + this.patterns.add(Pattern.compile(patternString, Pattern.CASE_INSENSITIVE)); + return this; + } + + /** + * Add a custom pattern. + * @param pattern the regex pattern + * @return this builder + */ + public Builder addPattern(Pattern pattern) { + Assert.notNull(pattern, "pattern cannot be null"); + if (!this.useDefaultPatterns) { + this.patterns.clear(); + this.useDefaultPatterns = true; // Reset flag after first custom pattern + } + this.patterns.add(pattern); + return this; + } + + /** + * Build the thinking tag cleaner. + * @return a new thinking tag cleaner instance + */ + public ThinkingTagCleaner build() { + return new ThinkingTagCleaner(this.patterns); + } + + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java b/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java new file mode 100644 index 00000000000..1c52a6c0421 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/converter/WhitespaceCleaner.java @@ -0,0 +1,32 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +/** + * A {@link ResponseTextCleaner} that trims leading and trailing whitespace from text. + * + * @author liugddx + * @since 1.1.0 + */ +public class WhitespaceCleaner implements ResponseTextCleaner { + + @Override + public String clean(String text) { + return text != null ? text.trim() : text; + } + +} diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java index 498d0782045..9066aa2c814 100644 --- a/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/BeanOutputConverterTest.java @@ -238,6 +238,131 @@ void convertTypeReferenceArrayWithJsonAnnotations() { assertThat(testClass.get(0).getSomeString()).isEqualTo("some value"); } + @Test + void convertWithThinkingTags() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = "This is my reasoning process...{ \"someString\": \"some value\" }"; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + + @Test + void convertWithThinkingTagsMultiline() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = """ + + This is my reasoning process + spanning multiple lines + + { "someString": "some value" } + """; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + + @Test + void convertWithThinkingTagsAndMarkdownCodeBlock() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = """ + This is my reasoning process... + ```json + { "someString": "some value" } + ``` + """; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + + @Test + void convertWithMultipleThinkingTags() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkingTags = "First thoughtSecond thought{ \"someString\": \"some value\" }"; + var testClass = converter.convert(textWithThinkingTags); + assertThat(testClass.getSomeString()).isEqualTo("some value"); + } + + @Test + void convertWithQwenThinkTags() { + // Test Qwen model format: ... + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkTags = "Let me analyze this...{ \"someString\": \"qwen test\" }"; + var testClass = converter.convert(textWithThinkTags); + assertThat(testClass.getSomeString()).isEqualTo("qwen test"); + } + + @Test + void convertWithQwenThinkTagsMultiline() { + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithThinkTags = """ + + Analyzing the request step by step + First, I need to understand the schema + Then generate the JSON + + { "someString": "qwen multiline" } + """; + var testClass = converter.convert(textWithThinkTags); + assertThat(testClass.getSomeString()).isEqualTo("qwen multiline"); + } + + @Test + void convertWithMixedThinkingAndThinkTags() { + // Test mixed format from different models + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithMixedTags = "Nova reasoningQwen analysis{ \"someString\": \"mixed test\" }"; + var testClass = converter.convert(textWithMixedTags); + assertThat(testClass.getSomeString()).isEqualTo("mixed test"); + } + + @Test + void convertWithReasoningTags() { + // Test alternative reasoning tags + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithReasoningTags = "Internal reasoning process{ \"someString\": \"reasoning test\" }"; + var testClass = converter.convert(textWithReasoningTags); + assertThat(testClass.getSomeString()).isEqualTo("reasoning test"); + } + + @Test + void convertWithMarkdownThinkingBlock() { + // Test markdown-style thinking block + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithMarkdownThinking = """ + ```thinking + This is a markdown-style thinking block + Used by some models + ``` + { "someString": "markdown thinking" } + """; + var testClass = converter.convert(textWithMarkdownThinking); + assertThat(testClass.getSomeString()).isEqualTo("markdown thinking"); + } + + @Test + void convertWithCaseInsensitiveTags() { + // Test case insensitive tag matching + var converter = new BeanOutputConverter<>(TestClass.class); + String textWithUpperCaseTags = "UPPERCASE THINKING{ \"someString\": \"case test\" }"; + var testClass = converter.convert(textWithUpperCaseTags); + assertThat(testClass.getSomeString()).isEqualTo("case test"); + } + + @Test + void convertWithComplexNestedStructure() { + // Test complex scenario with multiple formats combined + var converter = new BeanOutputConverter<>(TestClass.class); + String complexText = """ + Nova model reasoning + Qwen model analysis + + ```json + { "someString": "complex test" } + ``` + """; + var testClass = converter.convert(complexText); + assertThat(testClass.getSomeString()).isEqualTo("complex test"); + } + } // @checkstyle:off RegexpSinglelineJavaCheck diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java new file mode 100644 index 00000000000..7cf0ebc54c1 --- /dev/null +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/CompositeResponseTextCleanerTest.java @@ -0,0 +1,105 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link CompositeResponseTextCleaner}. + * + * @author liugddx + */ +class CompositeResponseTextCleanerTest { + + @Test + void shouldApplyCleanersInOrder() { + var cleaner = CompositeResponseTextCleaner.builder() + .addCleaner(text -> text.replace("A", "B")) + .addCleaner(text -> text.replace("B", "C")) + .build(); + + String result = cleaner.clean("AAA"); + assertThat(result).isEqualTo("CCC"); + } + + @Test + void shouldWorkWithSingleCleaner() { + var cleaner = new CompositeResponseTextCleaner(text -> text.trim()); + String result = cleaner.clean(" content "); + assertThat(result).isEqualTo("content"); + } + + @Test + void shouldWorkWithMultipleCleaners() { + var cleaner = new CompositeResponseTextCleaner(new WhitespaceCleaner(), new ThinkingTagCleaner(), + new MarkdownCodeBlockCleaner()); + + String input = """ + Reasoning + ```json + {"key": "value"} + ``` + """; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("{\"key\": \"value\"}"); + } + + @Test + void shouldHandleComplexPipeline() { + var cleaner = CompositeResponseTextCleaner.builder() + .addCleaner(new WhitespaceCleaner()) + .addCleaner(new ThinkingTagCleaner()) + .addCleaner(new MarkdownCodeBlockCleaner()) + .addCleaner(new WhitespaceCleaner()) + .build(); + + String input = """ + + Let me analyze this + Qwen style thinking + + ```json + { + "result": "test" + } + ``` + + """; + + String result = cleaner.clean(input); + assertThat(result).isEqualTo("{\n\t\"result\": \"test\"\n}"); + } + + @Test + void shouldThrowExceptionWhenCleanersIsNull() { + assertThatThrownBy(() -> CompositeResponseTextCleaner.builder().addCleaner(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("cleaner cannot be null"); + } + + @Test + void shouldHandleEmptyCleanersList() { + var cleaner = new CompositeResponseTextCleaner(); + String input = "test content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo(input); + } + +} diff --git a/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java b/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java new file mode 100644 index 00000000000..a8d596f337f --- /dev/null +++ b/spring-ai-model/src/test/java/org/springframework/ai/converter/ThinkingTagCleanerTest.java @@ -0,0 +1,152 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.converter; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link ThinkingTagCleaner}. + * + * @author liugddx + */ +class ThinkingTagCleanerTest { + + @Test + void shouldRemoveAmazonNovaThinkingTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "My reasoning processActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveQwenThinkTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "Let me think about thisActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveReasoningTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "Step by step reasoningActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveMultilineThinkingTags() { + var cleaner = new ThinkingTagCleaner(); + String input = """ + + Line 1 of thinking + Line 2 of thinking + + Actual content"""; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveMultipleThinkingTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "FirstSecondThirdActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldBeCaseInsensitive() { + var cleaner = new ThinkingTagCleaner(); + String input = "UPPER CASEActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldRemoveMarkdownThinkingBlocks() { + var cleaner = new ThinkingTagCleaner(); + String input = """ + ```thinking + This is markdown thinking + ``` + Actual content"""; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldHandleEmptyInput() { + var cleaner = new ThinkingTagCleaner(); + assertThat(cleaner.clean("")).isEmpty(); + assertThat(cleaner.clean(null)).isNull(); + } + + @Test + void shouldHandleContentWithoutTags() { + var cleaner = new ThinkingTagCleaner(); + String input = "Just regular content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo(input); + } + + @Test + void shouldSupportCustomPatterns() { + var cleaner = new ThinkingTagCleaner("(?s).*?\\s*"); + String input = "Custom tag contentActual content"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Actual content"); + } + + @Test + void shouldSupportBuilderWithoutDefaultPatterns() { + var cleaner = ThinkingTagCleaner.builder() + .withoutDefaultPatterns() + .addPattern("(?s).*?\\s*") + .build(); + + String input = "Should remainShould be removedContent"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Should remainContent"); + } + + @Test + void shouldSupportBuilderWithAdditionalPatterns() { + var cleaner = ThinkingTagCleaner.builder().addPattern("(?s).*?\\s*").build(); + + String input = "RemovedAlso removedContent"; + String result = cleaner.clean(input); + assertThat(result).isEqualTo("Content"); + } + + @Test + void shouldThrowExceptionWhenPatternsAreNull() { + assertThatThrownBy(() -> new ThinkingTagCleaner((String[]) null)).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("patternStrings cannot be null"); + } + + @Test + void shouldThrowExceptionWhenPatternsAreEmpty() { + assertThatThrownBy(() -> new ThinkingTagCleaner(new String[0])).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("patternStrings cannot be empty"); + } + +}