Skip to content

Commit 50b38dc

Browse files
Added scanned PDF test resource for OCR integration (#15428)
1 parent be3ca26 commit 50b38dc

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

jablib/src/test/java/org/jabref/logic/search/indexing/DocumentReaderTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
package org.jabref.logic.search.indexing;
22

3+
import java.nio.file.Path;
4+
import java.util.List;
35
import java.util.stream.Stream;
46

7+
import org.jabref.model.search.LinkedFilesConstants;
8+
9+
import org.apache.lucene.document.Document;
10+
import org.junit.jupiter.api.Test;
511
import org.junit.jupiter.params.ParameterizedTest;
612
import org.junit.jupiter.params.provider.Arguments;
713
import org.junit.jupiter.params.provider.MethodSource;
814

915
import static org.junit.jupiter.api.Assertions.assertEquals;
16+
import static org.junit.jupiter.api.Assertions.assertFalse;
17+
import static org.junit.jupiter.api.Assertions.assertNull;
1018

1119
public class DocumentReaderTest {
1220

@@ -25,4 +33,19 @@ public void mergeLinesTest(String expected, String linesToMerge) {
2533
String result = DocumentReader.mergeLines(linesToMerge);
2634
assertEquals(expected, result);
2735
}
36+
37+
@Test
38+
void scannedPdfHasNoExtractableContent() {
39+
DocumentReader reader = new DocumentReader();
40+
List<Document> pages = reader.readPdfContents(
41+
"scanned-image-only.pdf",
42+
Path.of("src/test/resources/pdfs/scanned-image-only.pdf")
43+
);
44+
45+
assertFalse(pages.isEmpty());
46+
47+
for (Document page : pages) {
48+
assertNull(page.getField(LinkedFilesConstants.CONTENT.toString()));
49+
}
50+
}
2851
}
5.78 KB
Binary file not shown.

0 commit comments

Comments
 (0)