Skip to content

Commit b396855

Browse files
committed
Sample pdf showing bug in TextLine
1 parent 6982100 commit b396855

File tree

2 files changed

+22
-13
lines changed

2 files changed

+22
-13
lines changed

document-readers/pdf-reader/src/test/java/org/springframework/ai/reader/pdf/PagePdfDocumentReaderTests.java

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,14 @@
1616

1717
package org.springframework.ai.reader.pdf;
1818

19-
import java.util.List;
20-
import java.util.stream.Collectors;
21-
2219
import org.junit.jupiter.api.Test;
23-
2420
import org.springframework.ai.document.Document;
2521
import org.springframework.ai.reader.ExtractedTextFormatter;
2622
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
2723

24+
import java.util.List;
25+
import java.util.stream.Collectors;
26+
2827
import static org.assertj.core.api.Assertions.assertThat;
2928

3029
/**
@@ -37,15 +36,15 @@ public void classpathRead() {
3736

3837
PagePdfDocumentReader pdfReader = new PagePdfDocumentReader("classpath:/sample1.pdf",
3938
PdfDocumentReaderConfig.builder()
40-
.withPageTopMargin(0)
41-
.withPageBottomMargin(0)
42-
.withPageExtractedTextFormatter(ExtractedTextFormatter.builder()
43-
.withNumberOfTopTextLinesToDelete(0)
44-
.withNumberOfBottomTextLinesToDelete(3)
45-
.withNumberOfTopPagesToSkipBeforeDelete(0)
46-
.build())
47-
.withPagesPerDocument(1)
48-
.build());
39+
.withPageTopMargin(0)
40+
.withPageBottomMargin(0)
41+
.withPageExtractedTextFormatter(ExtractedTextFormatter.builder()
42+
.withNumberOfTopTextLinesToDelete(0)
43+
.withNumberOfBottomTextLinesToDelete(3)
44+
.withNumberOfTopPagesToSkipBeforeDelete(0)
45+
.build())
46+
.withPagesPerDocument(1)
47+
.build());
4948

5049
List<Document> docs = pdfReader.get();
5150

@@ -57,4 +56,14 @@ public void classpathRead() {
5756
List.of("Page 1 of 4", "Page 2 of 4", "Page 3 of 4", "Page 4 of 4", "PDF Bookmark Sample"));
5857
}
5958

59+
@Test
60+
void testIndexOutOfBound() {
61+
new PagePdfDocumentReader("classpath:/sample2.pdf",
62+
PdfDocumentReaderConfig.builder()
63+
.withPageExtractedTextFormatter(ExtractedTextFormatter.builder()
64+
.build())
65+
.withPagesPerDocument(1)
66+
.build()).get();
67+
}
68+
6069
}
3.93 MB
Binary file not shown.

0 commit comments

Comments
 (0)