diff --git a/document-readers/markdown-reader/pom.xml b/document-readers/markdown-reader/pom.xml new file mode 100644 index 00000000000..5922ea2b4ed --- /dev/null +++ b/document-readers/markdown-reader/pom.xml @@ -0,0 +1,46 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai + 1.0.0-SNAPSHOT + ../../pom.xml + + spring-ai-markdown-document-reader + jar + Spring AI Document Reader - Markdown + Spring AI Markdown document reader + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + org.springframework.ai + spring-ai-core + ${parent.version} + + + + org.commonmark + commonmark + ${commonmark.version} + + + + + org.springframework.boot + spring-boot-starter-test + test + + + + + diff --git a/document-readers/markdown-reader/src/main/java/org/springframework/ai/reader/markdown/MarkdownDocumentReader.java b/document-readers/markdown-reader/src/main/java/org/springframework/ai/reader/markdown/MarkdownDocumentReader.java new file mode 100644 index 00000000000..7ed8aa6b548 --- /dev/null +++ b/document-readers/markdown-reader/src/main/java/org/springframework/ai/reader/markdown/MarkdownDocumentReader.java @@ -0,0 +1,207 @@ +package org.springframework.ai.reader.markdown; + +import org.commonmark.node.*; +import org.commonmark.parser.Parser; +import org.springframework.ai.document.Document; +import org.springframework.ai.document.DocumentReader; +import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig; +import org.springframework.core.io.DefaultResourceLoader; +import org.springframework.core.io.Resource; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; + +/** + * Reads the given Markdown resource and groups headers, paragraphs, or text divided by + * horizontal lines (depending on the + * {@link MarkdownDocumentReaderConfig#horizontalRuleCreateDocument} configuration) into + * {@link Document}s. + * + * @author Piotr Olaszewski + */ +public class MarkdownDocumentReader implements DocumentReader { + + /** + * The resource points to the Markdown document. + */ + private final Resource markdownResource; + + /** + * Configuration to a parsing process. + */ + private final MarkdownDocumentReaderConfig config; + + /** + * Markdown parser. + */ + private final Parser parser; + + public MarkdownDocumentReader(String markdownResource) { + this(new DefaultResourceLoader().getResource(markdownResource), MarkdownDocumentReaderConfig.defaultConfig()); + } + + public MarkdownDocumentReader(String markdownResource, MarkdownDocumentReaderConfig config) { + this(new DefaultResourceLoader().getResource(markdownResource), config); + } + + public MarkdownDocumentReader(Resource markdownResource, MarkdownDocumentReaderConfig config) { + this.markdownResource = markdownResource; + this.config = config; + this.parser = Parser.builder().build(); + } + + /** + * Extracts and returns a list of documents from the resource. + * @return List of extracted {@link Document} + */ + @Override + public List get() { + try (var input = markdownResource.getInputStream()) { + Node node = parser.parseReader(new InputStreamReader(input)); + + DocumentVisitor documentVisitor = new DocumentVisitor(config); + node.accept(documentVisitor); + + return documentVisitor.getDocuments(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * A convenient class for visiting handled nodes in the Markdown document. + */ + static class DocumentVisitor extends AbstractVisitor { + + private final List documents = new ArrayList<>(); + + private final List currentParagraphs = new ArrayList<>(); + + private final MarkdownDocumentReaderConfig config; + + private Document.Builder currentDocumentBuilder; + + public DocumentVisitor(MarkdownDocumentReaderConfig config) { + this.config = config; + } + + @Override + public void visit(org.commonmark.node.Document document) { + currentDocumentBuilder = Document.builder(); + super.visit(document); + } + + @Override + public void visit(Heading heading) { + buildAndFlush(); + super.visit(heading); + } + + @Override + public void visit(ThematicBreak thematicBreak) { + if (config.horizontalRuleCreateDocument) { + buildAndFlush(); + } + super.visit(thematicBreak); + } + + @Override + public void visit(SoftLineBreak softLineBreak) { + translateLineBreakToSpace(); + super.visit(softLineBreak); + } + + @Override + public void visit(HardLineBreak hardLineBreak) { + translateLineBreakToSpace(); + super.visit(hardLineBreak); + } + + @Override + public void visit(ListItem listItem) { + translateLineBreakToSpace(); + super.visit(listItem); + } + + @Override + public void visit(BlockQuote blockQuote) { + if (!config.includeBlockquote) { + buildAndFlush(); + } + + translateLineBreakToSpace(); + currentDocumentBuilder.withMetadata("category", "blockquote"); + super.visit(blockQuote); + } + + @Override + public void visit(Code code) { + currentParagraphs.add(code.getLiteral()); + currentDocumentBuilder.withMetadata("category", "code_inline"); + super.visit(code); + } + + @Override + public void visit(FencedCodeBlock fencedCodeBlock) { + if (!config.includeCodeBlock) { + buildAndFlush(); + } + + translateLineBreakToSpace(); + currentParagraphs.add(fencedCodeBlock.getLiteral()); + currentDocumentBuilder.withMetadata("category", "code_block"); + currentDocumentBuilder.withMetadata("lang", fencedCodeBlock.getInfo()); + + buildAndFlush(); + + super.visit(fencedCodeBlock); + } + + @Override + public void visit(Text text) { + if (text.getParent() instanceof Heading heading) { + currentDocumentBuilder.withMetadata("category", "header_%d".formatted(heading.getLevel())) + .withMetadata("title", text.getLiteral()); + } + else { + currentParagraphs.add(text.getLiteral()); + } + + super.visit(text); + } + + public List getDocuments() { + buildAndFlush(); + + return documents; + } + + private void buildAndFlush() { + if (!currentParagraphs.isEmpty()) { + String content = String.join("", currentParagraphs); + + Document.Builder builder = currentDocumentBuilder.withContent(content); + + config.additionalMetadata.forEach(builder::withMetadata); + + Document document = builder.build(); + + documents.add(document); + + currentParagraphs.clear(); + } + currentDocumentBuilder = Document.builder(); + } + + private void translateLineBreakToSpace() { + if (!currentParagraphs.isEmpty()) { + currentParagraphs.add(" "); + } + } + + } + +} diff --git a/document-readers/markdown-reader/src/main/java/org/springframework/ai/reader/markdown/config/MarkdownDocumentReaderConfig.java b/document-readers/markdown-reader/src/main/java/org/springframework/ai/reader/markdown/config/MarkdownDocumentReaderConfig.java new file mode 100644 index 00000000000..d5ad3ec58ce --- /dev/null +++ b/document-readers/markdown-reader/src/main/java/org/springframework/ai/reader/markdown/config/MarkdownDocumentReaderConfig.java @@ -0,0 +1,123 @@ +package org.springframework.ai.reader.markdown.config; + +import org.springframework.ai.document.Document; +import org.springframework.ai.reader.markdown.MarkdownDocumentReader; +import org.springframework.util.Assert; + +import java.util.HashMap; +import java.util.Map; + +/** + * Common configuration for the {@link MarkdownDocumentReader}. + * + * @author Piotr Olaszewski + */ +public class MarkdownDocumentReaderConfig { + + public final boolean horizontalRuleCreateDocument; + + public final boolean includeCodeBlock; + + public final boolean includeBlockquote; + + public final Map additionalMetadata; + + public MarkdownDocumentReaderConfig(Builder builder) { + horizontalRuleCreateDocument = builder.horizontalRuleCreateDocument; + includeCodeBlock = builder.includeCodeBlock; + includeBlockquote = builder.includeBlockquote; + additionalMetadata = builder.additionalMetadata; + } + + /** + * @return the default configuration + */ + public static MarkdownDocumentReaderConfig defaultConfig() { + return builder().build(); + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + + private boolean horizontalRuleCreateDocument = false; + + private boolean includeCodeBlock = false; + + private boolean includeBlockquote = false; + + private Map additionalMetadata = new HashMap<>(); + + private Builder() { + } + + /** + * Text divided by horizontal lines will create new {@link Document}s. The default + * is {@code false}, meaning text separated by horizontal lines won't create a new + * document. + * @param horizontalRuleCreateDocument flag to determine whether new documents are + * created from text divided by horizontal line + * @return this builder + */ + public Builder withHorizontalRuleCreateDocument(boolean horizontalRuleCreateDocument) { + this.horizontalRuleCreateDocument = horizontalRuleCreateDocument; + return this; + } + + /** + * Whatever to include code blocks in {@link Document}s. The default is + * {@code false}, which means all code blocks are in separate documents. + * @param includeCodeBlock flag to include code block into paragraph document or + * create new with code only + * @return this builder + */ + public Builder withIncludeCodeBlock(boolean includeCodeBlock) { + this.includeCodeBlock = includeCodeBlock; + return this; + } + + /** + * Whatever to include blockquotes in {@link Document}s. The default is + * {@code false}, which means all blockquotes are in separate documents. + * @param includeBlockquote flag to include blockquotes into paragraph document or + * create new with blockquote only + * @return this builder + */ + public Builder withIncludeBlockquote(boolean includeBlockquote) { + this.includeBlockquote = includeBlockquote; + return this; + } + + /** + * Adds this additional metadata to the all built {@link Document}s. + * @return this builder + */ + public Builder withAdditionalMetadata(String key, Object value) { + Assert.notNull(key, "key must not be null"); + Assert.notNull(value, "value must not be null"); + this.additionalMetadata.put(key, value); + return this; + } + + /** + * Adds this additional metadata to the all built {@link Document}s. + * @return this builder + */ + public Builder withAdditionalMetadata(Map additionalMetadata) { + Assert.notNull(additionalMetadata, "additionalMetadata must not be null"); + this.additionalMetadata = additionalMetadata; + return this; + } + + /** + * @return the immutable configuration + */ + public MarkdownDocumentReaderConfig build() { + return new MarkdownDocumentReaderConfig(this); + } + + } + +} diff --git a/document-readers/markdown-reader/src/test/java/org/springframework/ai/reader/markdown/MarkdownDocumentReaderTest.java b/document-readers/markdown-reader/src/test/java/org/springframework/ai/reader/markdown/MarkdownDocumentReaderTest.java new file mode 100644 index 00000000000..739dbbd709b --- /dev/null +++ b/document-readers/markdown-reader/src/test/java/org/springframework/ai/reader/markdown/MarkdownDocumentReaderTest.java @@ -0,0 +1,230 @@ +package org.springframework.ai.reader.markdown; + +import org.junit.jupiter.api.Test; +import org.springframework.ai.document.Document; +import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig; + +import java.util.List; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.groups.Tuple.tuple; + +/** + * @author Piotr Olaszewski + */ +class MarkdownDocumentReaderTest { + + @Test + void testOnlyHeadersWithParagraphs() { + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/only-headers.md"); + + List documents = reader.get(); + + assertThat(documents).hasSize(4) + .extracting(Document::getMetadata, Document::getContent) + .containsOnly(tuple(Map.of("category", "header_1", "title", "Header 1a"), + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue."), + tuple(Map.of("category", "header_1", "title", "Header 1b"), + "Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Etiam lobortis risus libero, sed sollicitudin risus cursus in. Morbi enim metus, ornare vel lacinia eget, venenatis vel nibh."), + tuple(Map.of("category", "header_2", "title", "Header 2b"), + "Proin vel laoreet leo, sed luctus augue. Sed et ligula commodo, commodo lacus at, consequat turpis. Maecenas eget sapien odio. Maecenas urna lectus, pellentesque in accumsan aliquam, congue eu libero."), + tuple(Map.of("category", "header_2", "title", "Header 2c"), + "Ut rhoncus nec justo a porttitor. Pellentesque auctor pharetra eros, viverra sodales lorem aliquet id. Curabitur semper nisi vel sem interdum suscipit.")); + } + + @Test + void testWithFormatting() { + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/with-formatting.md"); + + List documents = reader.get(); + + assertThat(documents).hasSize(2) + .extracting(Document::getMetadata, Document::getContent) + .containsOnly(tuple(Map.of("category", "header_1", "title", "This is a fancy header name"), + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt velit non bibendum gravida. Cras accumsan tincidunt ornare. Donec hendrerit consequat tellus blandit accumsan. Aenean aliquam metus at arcu elementum dignissim."), + tuple(Map.of("category", "header_3", "title", "Header 3"), + "Aenean eu leo eu nibh tristique posuere quis quis massa.")); + } + + @Test + void testDocumentDividedViaHorizontalRules() { + MarkdownDocumentReaderConfig config = MarkdownDocumentReaderConfig.builder() + .withHorizontalRuleCreateDocument(true) + .build(); + + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/horizontal-rules.md", config); + + List documents = reader.get(); + + assertThat(documents).hasSize(7) + .extracting(Document::getMetadata, Document::getContent) + .containsOnly(tuple(Map.of(), + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt velit non bibendum gravida."), + tuple(Map.of(), + "Cras accumsan tincidunt ornare. Donec hendrerit consequat tellus blandit accumsan. Aenean aliquam metus at arcu elementum dignissim."), + tuple(Map.of(), + "Nullam nisi dui, egestas nec sem nec, interdum lobortis enim. Pellentesque odio orci, faucibus eu luctus nec, venenatis et magna."), + tuple(Map.of(), + "Vestibulum nec eros non felis fermentum posuere eget ac risus. Curabitur et fringilla massa. Cras facilisis nec nisl sit amet sagittis."), + tuple(Map.of(), + "Aenean eu leo eu nibh tristique posuere quis quis massa. Nullam lacinia luctus sem ut vehicula."), + tuple(Map.of(), + "Aenean quis vulputate mi. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Nam tincidunt nunc a tortor tincidunt, nec lobortis diam rhoncus."), + tuple(Map.of(), "Nulla facilisi. Phasellus eget tellus sed nibh ornare interdum eu eu mi.")); + } + + @Test + void testDocumentNotDividedViaHorizontalRulesWhenIsDisabled() { + MarkdownDocumentReaderConfig config = MarkdownDocumentReaderConfig.builder() + .withHorizontalRuleCreateDocument(false) + .build(); + + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/horizontal-rules.md", config); + + List documents = reader.get(); + + assertThat(documents).hasSize(1); + + Document documentsFirst = documents.get(0); + assertThat(documentsFirst.getMetadata()).isEmpty(); + assertThat(documentsFirst.getContent()).startsWith("Lorem ipsum dolor sit amet, consectetur adipiscing elit") + .endsWith("Phasellus eget tellus sed nibh ornare interdum eu eu mi."); + } + + @Test + void testSimpleMarkdownDocumentWithHardAndSoftLineBreaks() { + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/simple.md"); + + List documents = reader.get(); + + assertThat(documents).hasSize(1); + + Document documentsFirst = documents.get(0); + assertThat(documentsFirst.getMetadata()).isEmpty(); + assertThat(documentsFirst.getContent()).isEqualTo( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt velit non bibendum gravida. Cras accumsan tincidunt ornare. Donec hendrerit consequat tellus blandit accumsan. Aenean aliquam metus at arcu elementum dignissim.Nullam nisi dui, egestas nec sem nec, interdum lobortis enim. Pellentesque odio orci, faucibus eu luctus nec, venenatis et magna. Vestibulum nec eros non felis fermentum posuere eget ac risus.Aenean eu leo eu nibh tristique posuere quis quis massa. Nullam lacinia luctus sem ut vehicula."); + } + + @Test + void testCode() { + MarkdownDocumentReaderConfig config = MarkdownDocumentReaderConfig.builder() + .withHorizontalRuleCreateDocument(true) + .build(); + + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/code.md", config); + + List documents = reader.get(); + + assertThat(documents).satisfiesExactly(document -> { + assertThat(document.getMetadata()).isEqualTo(Map.of()); + assertThat(document.getContent()).isEqualTo("This is a Java sample application:"); + }, document -> { + assertThat(document.getMetadata()).isEqualTo(Map.of("lang", "java", "category", "code_block")); + assertThat(document.getContent()).startsWith("package com.example.demo;") + .contains("SpringApplication.run(DemoApplication.class, args);"); + }, document -> { + assertThat(document.getMetadata()).isEqualTo(Map.of("category", "code_inline")); + assertThat(document.getContent()).isEqualTo( + "Markdown also provides the possibility to use inline code formatting throughout the entire sentence."); + }, document -> { + assertThat(document.getMetadata()).isEqualTo(Map.of()); + assertThat(document.getContent()) + .isEqualTo("Another possibility is to set block code without specific highlighting:"); + }, document -> { + assertThat(document.getMetadata()).isEqualTo(Map.of("lang", "", "category", "code_block")); + assertThat(document.getContent()).isEqualTo("./mvnw spring-javaformat:apply\n"); + }); + } + + @Test + void testCodeWhenCodeBlockShouldNotBeSeparatedDocument() { + MarkdownDocumentReaderConfig config = MarkdownDocumentReaderConfig.builder() + .withHorizontalRuleCreateDocument(true) + .withIncludeCodeBlock(true) + .build(); + + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/code.md", config); + + List documents = reader.get(); + + assertThat(documents).satisfiesExactly(document -> { + assertThat(document.getMetadata()).isEqualTo(Map.of("lang", "java", "category", "code_block")); + assertThat(document.getContent()).startsWith("This is a Java sample application: package com.example.demo") + .contains("SpringApplication.run(DemoApplication.class, args);"); + }, document -> { + assertThat(document.getMetadata()).isEqualTo(Map.of("category", "code_inline")); + assertThat(document.getContent()).isEqualTo( + "Markdown also provides the possibility to use inline code formatting throughout the entire sentence."); + }, document -> { + assertThat(document.getMetadata()).isEqualTo(Map.of("lang", "", "category", "code_block")); + assertThat(document.getContent()).isEqualTo( + "Another possibility is to set block code without specific highlighting: ./mvnw spring-javaformat:apply\n"); + }); + } + + @Test + void testBlockquote() { + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/blockquote.md"); + + List documents = reader.get(); + + assertThat(documents).hasSize(2) + .extracting(Document::getMetadata, Document::getContent) + .containsOnly(tuple(Map.of(), + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue."), + tuple(Map.of("category", "blockquote"), + "Proin vel laoreet leo, sed luctus augue. Sed et ligula commodo, commodo lacus at, consequat turpis. Maecenas eget sapien odio. Maecenas urna lectus, pellentesque in accumsan aliquam, congue eu libero. Ut rhoncus nec justo a porttitor. Pellentesque auctor pharetra eros, viverra sodales lorem aliquet id. Curabitur semper nisi vel sem interdum suscipit.")); + } + + @Test + void testBlockquoteWhenBlockquoteShouldNotBeSeparatedDocument() { + MarkdownDocumentReaderConfig config = MarkdownDocumentReaderConfig.builder() + .withIncludeBlockquote(true) + .build(); + + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/blockquote.md", config); + + List documents = reader.get(); + + assertThat(documents).hasSize(1); + + Document documentsFirst = documents.get(0); + assertThat(documentsFirst.getMetadata()).isEqualTo(Map.of("category", "blockquote")); + assertThat(documentsFirst.getContent()).isEqualTo( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue. Proin vel laoreet leo, sed luctus augue. Sed et ligula commodo, commodo lacus at, consequat turpis. Maecenas eget sapien odio. Maecenas urna lectus, pellentesque in accumsan aliquam, congue eu libero. Ut rhoncus nec justo a porttitor. Pellentesque auctor pharetra eros, viverra sodales lorem aliquet id. Curabitur semper nisi vel sem interdum suscipit."); + } + + @Test + void testLists() { + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/lists.md"); + + List documents = reader.get(); + + assertThat(documents).hasSize(2) + .extracting(Document::getMetadata, Document::getContent) + .containsOnly(tuple(Map.of("category", "header_2", "title", "Ordered list"), + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue. Proin vel laoreet leo, sed luctus augue. Sed et ligula commodo, commodo lacus at, consequat turpis. Maecenas eget sapien odio. Pellentesque auctor pharetra eros, viverra sodales lorem aliquet id. Curabitur semper nisi vel sem interdum suscipit. Maecenas urna lectus, pellentesque in accumsan aliquam, congue eu libero. Ut rhoncus nec justo a porttitor."), + tuple(Map.of("category", "header_2", "title", "Unordered list"), + "Aenean eu leo eu nibh tristique posuere quis quis massa. Aenean imperdiet libero dui, nec malesuada dui maximus vel. Vestibulum sed dui condimentum, cursus libero in, dapibus tortor. Etiam facilisis enim in egestas dictum.")); + } + + @Test + void testWithAdditionalMetadata() { + MarkdownDocumentReaderConfig config = MarkdownDocumentReaderConfig.builder() + .withAdditionalMetadata("service", "some-service-name") + .withAdditionalMetadata("env", "prod") + .build(); + + MarkdownDocumentReader reader = new MarkdownDocumentReader("classpath:/simple.md", config); + + List documents = reader.get(); + + assertThat(documents).hasSize(1); + + Document documentsFirst = documents.get(0); + assertThat(documentsFirst.getMetadata()).isEqualTo(Map.of("service", "some-service-name", "env", "prod")); + assertThat(documentsFirst.getContent()).startsWith("Lorem ipsum dolor sit amet, consectetur adipiscing elit."); + } + +} diff --git a/document-readers/markdown-reader/src/test/resources/blockquote.md b/document-readers/markdown-reader/src/test/resources/blockquote.md new file mode 100644 index 00000000000..d92ac44f6cd --- /dev/null +++ b/document-readers/markdown-reader/src/test/resources/blockquote.md @@ -0,0 +1,8 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed +nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue. + +> Proin vel laoreet leo, sed luctus augue. Sed et ligula commodo, commodo lacus at, consequat turpis. Maecenas eget +> sapien odio. Maecenas urna lectus, pellentesque in accumsan aliquam, congue eu libero. Ut rhoncus nec justo a +> porttitor. Pellentesque auctor pharetra eros, viverra sodales lorem aliquet id. Curabitur semper nisi vel sem interdum +> suscipit. + diff --git a/document-readers/markdown-reader/src/test/resources/code.md b/document-readers/markdown-reader/src/test/resources/code.md new file mode 100644 index 00000000000..31d7c7b0319 --- /dev/null +++ b/document-readers/markdown-reader/src/test/resources/code.md @@ -0,0 +1,25 @@ +This is a Java sample application: + +```java +package com.example.demo; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +@SpringBootApplication +public class DemoApplication { + public static void main(String[] args) { + SpringApplication.run(DemoApplication.class, args); + } +} +``` + +Markdown also provides the possibility to `use inline code formatting throughout` the entire sentence. + +--- + +Another possibility is to set block code without specific highlighting: + +``` +./mvnw spring-javaformat:apply +``` diff --git a/document-readers/markdown-reader/src/test/resources/horizontal-rules.md b/document-readers/markdown-reader/src/test/resources/horizontal-rules.md new file mode 100644 index 00000000000..f7affefc124 --- /dev/null +++ b/document-readers/markdown-reader/src/test/resources/horizontal-rules.md @@ -0,0 +1,27 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt velit non bibendum gravida. + +--- + +Cras accumsan tincidunt ornare. Donec hendrerit consequat tellus blandit accumsan. Aenean aliquam metus at arcu +elementum dignissim. + +*** +Nullam nisi dui, egestas nec sem nec, interdum lobortis enim. Pellentesque odio orci, faucibus eu luctus nec, venenatis +et magna. + +* * * + +Vestibulum nec eros non felis fermentum posuere eget ac risus. Curabitur et fringilla massa. Cras facilisis nec nisl sit +amet sagittis. + +***** + +Aenean eu leo eu nibh tristique posuere quis quis massa. Nullam lacinia luctus sem ut vehicula. + +--------------------------------------- + +Aenean quis vulputate mi. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Nam tincidunt nunc a tortor tincidunt, nec lobortis diam rhoncus. + +- - - + +Nulla facilisi. Phasellus eget tellus sed nibh ornare interdum eu eu mi. diff --git a/document-readers/markdown-reader/src/test/resources/lists.md b/document-readers/markdown-reader/src/test/resources/lists.md new file mode 100644 index 00000000000..f82e7e34521 --- /dev/null +++ b/document-readers/markdown-reader/src/test/resources/lists.md @@ -0,0 +1,17 @@ +## Ordered list + +1. Lorem ipsum dolor sit *amet*, consectetur adipiscing elit. **Curabitur** diam eros, laoreet sit _amet_ cursus vitae, + varius sed nisi. +2. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue. +3. Proin vel laoreet leo, sed luctus augue. Sed et ligula commodo, commodo lacus at, consequat turpis. Maecenas eget + sapien odio. + 1. Pellentesque auctor pharetra eros, viverra sodales lorem aliquet id. Curabitur semper nisi vel sem interdum + suscipit. + 2. Maecenas urna lectus, pellentesque in accumsan aliquam, congue eu libero. Ut rhoncus nec justo a porttitor. + +## Unordered list + +* Aenean eu leo eu nibh tristique posuere quis quis massa. +* Aenean imperdiet libero dui, nec malesuada dui maximus vel. Vestibulum sed dui condimentum, cursus libero in, dapibus + tortor. + * Etiam facilisis enim in egestas dictum. diff --git a/document-readers/markdown-reader/src/test/resources/only-headers.md b/document-readers/markdown-reader/src/test/resources/only-headers.md new file mode 100644 index 00000000000..81c770e875a --- /dev/null +++ b/document-readers/markdown-reader/src/test/resources/only-headers.md @@ -0,0 +1,20 @@ +# Header 1a + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed +nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue. + +# Header 1b + +Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Etiam lobortis risus libero, sed +sollicitudin risus cursus in. Morbi enim metus, ornare vel lacinia eget, venenatis vel nibh. + +## Header 2b + +Proin vel laoreet leo, sed luctus augue. Sed et ligula commodo, commodo lacus at, consequat turpis. Maecenas eget sapien +odio. Maecenas urna lectus, pellentesque in accumsan aliquam, congue eu libero. + +# Header 1c + +## Header 2c + +Ut rhoncus nec justo a porttitor. Pellentesque auctor pharetra eros, viverra sodales lorem aliquet id. Curabitur semper nisi vel sem interdum suscipit. diff --git a/document-readers/markdown-reader/src/test/resources/simple.md b/document-readers/markdown-reader/src/test/resources/simple.md new file mode 100644 index 00000000000..3275c89b8fc --- /dev/null +++ b/document-readers/markdown-reader/src/test/resources/simple.md @@ -0,0 +1,8 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt velit non bibendum gravida. Cras accumsan +tincidunt ornare. Donec hendrerit consequat tellus blandit accumsan. Aenean aliquam metus at arcu elementum dignissim. + +Nullam nisi dui, egestas nec sem nec, interdum lobortis enim. Pellentesque odio orci, faucibus eu luctus nec, venenatis et magna. Vestibulum nec eros non felis fermentum posuere eget ac risus. + +Aenean eu leo eu nibh tristique posuere quis quis massa.\ +Nullam lacinia luctus sem ut vehicula. + diff --git a/document-readers/markdown-reader/src/test/resources/with-formatting.md b/document-readers/markdown-reader/src/test/resources/with-formatting.md new file mode 100644 index 00000000000..963743ece30 --- /dev/null +++ b/document-readers/markdown-reader/src/test/resources/with-formatting.md @@ -0,0 +1,9 @@ +# This is a fancy header name + +Lorem ipsum dolor sit amet, **consectetur adipiscing elit**. Donec tincidunt velit non bibendum gravida. Cras accumsan +tincidunt ornare. Donec hendrerit consequat tellus *blandit* accumsan. Aenean aliquam metus at ***arcu elementum*** +dignissim. + +### Header 3 + +Aenean eu leo eu nibh tristique _posuere quis quis massa_. diff --git a/pom.xml b/pom.xml index d7f9e6b4c60..874ee60e424 100644 --- a/pom.xml +++ b/pom.xml @@ -23,6 +23,7 @@ spring-ai-spring-boot-testcontainers spring-ai-spring-cloud-bindings + document-readers/markdown-reader document-readers/pdf-reader document-readers/tika-reader @@ -186,6 +187,7 @@ 1.9.1 0.5.0 2.10.1 + 0.22.0 5.3.1