Skip to content

Commit fbcd844

Browse files
committed
Fix PDF document grouping logic to ensure correct page distribution according to pagesPerDocument
Signed-off-by: stroller <[email protected]>
1 parent 3f79373 commit fbcd844

File tree

2 files changed

+15
-1
lines changed

2 files changed

+15
-1
lines changed

document-readers/pdf-reader/src/main/java/org/springframework/ai/reader/pdf/PagePdfDocumentReader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
* pageBottomMargin = 0
4545
*
4646
* @author Christian Tzolov
47+
* @author Fu Jian
4748
*/
4849
public class PagePdfDocumentReader implements DocumentReader {
4950

@@ -119,7 +120,7 @@ public List<Document> get() {
119120
pagesPerDocument++;
120121

121122
if (this.config.pagesPerDocument != PdfDocumentReaderConfig.ALL_PAGES
122-
&& pagesPerDocument >= this.config.pagesPerDocument) {
123+
&& pagesPerDocument > this.config.pagesPerDocument) {
123124
pagesPerDocument = 0;
124125

125126
var aggregatedPageTextGroup = pageTextGroupList.stream().collect(Collectors.joining());

document-readers/pdf-reader/src/test/java/org/springframework/ai/reader/pdf/PagePdfDocumentReaderTests.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
/**
3131
* @author Christian Tzolov
3232
* @author Tibor Tarnai
33+
* @author Fu Jian
3334
*/
3435
class PagePdfDocumentReaderTests {
3536

@@ -71,4 +72,16 @@ void testIndexOutOfBound() {
7172
assertThat(documents).hasSize(64);
7273
}
7374

75+
@Test
76+
void testPagesPerDocument() {
77+
var documents = new PagePdfDocumentReader("classpath:/sample2.pdf",
78+
PdfDocumentReaderConfig.builder()
79+
.withPageExtractedTextFormatter(ExtractedTextFormatter.builder().build())
80+
.withPagesPerDocument(32)
81+
.build())
82+
.get();
83+
84+
assertThat(documents).hasSize(2);
85+
}
86+
7487
}

0 commit comments

Comments
 (0)