Skip to content

Commit 3e87a5d

Browse files
authored
BAEL-9267: Determine If a File is a PDF File in Java (#18493)
* BAEL-9267: Determine If a File is a PDF File in Java * BAEL-9267: Determine If a File is a PDF File in Java
1 parent 61b73ad commit 3e87a5d

File tree

2 files changed

+71
-1
lines changed

2 files changed

+71
-1
lines changed

text-processing-libraries-modules/pdf-2/pom.xml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@
4040
<artifactId>poi-ooxml</artifactId>
4141
<version>${poi-ooxml.version}</version>
4242
</dependency>
43+
<dependency>
44+
<groupId>org.apache.tika</groupId>
45+
<artifactId>tika-core</artifactId>
46+
<version>${tika.version}</version>
47+
</dependency>
4348
<dependency>
4449
<groupId>org.apache.logging.log4j</groupId>
4550
<artifactId>log4j-api</artifactId>
@@ -70,8 +75,9 @@
7075
<itextpdf.version>5.5.13.3</itextpdf.version>
7176
<itextpdf.core.version>7.2.3</itextpdf.core.version>
7277
<itextpdf.cleanup.version>3.0.1</itextpdf.cleanup.version>
73-
<pdfbox.version>3.0.0</pdfbox.version>
78+
<pdfbox.version>3.0.4</pdfbox.version>
7479
<poi-ooxml.version>5.2.5</poi-ooxml.version>
80+
<tika.version>3.1.0</tika.version>
7581
<log4j-api.version>2.20.0</log4j-api.version>
7682
<log4j-core.version>2.20.0</log4j-core.version>
7783
</properties>
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package com.baeldung.detect;
2+
3+
import org.apache.pdfbox.Loader;
4+
import org.apache.pdfbox.pdmodel.PDDocument;
5+
import org.apache.tika.Tika;
6+
import org.junit.jupiter.api.Test;
7+
8+
import static org.junit.jupiter.api.Assertions.assertTrue;
9+
10+
import java.io.*;
11+
import java.util.Objects;
12+
13+
import com.itextpdf.commons.exceptions.ITextException;
14+
import com.itextpdf.kernel.pdf.PdfDocument;
15+
import com.itextpdf.kernel.pdf.PdfReader;
16+
17+
18+
public class PdfDetectUnitTest {
19+
20+
private static final File PDF_FILE = new File("src/test/resources/input.pdf");
21+
22+
@Test
23+
void whenDetectPdfByPdfBox_thenCorrect() {
24+
boolean isPdf;
25+
try (PDDocument document = Loader.loadPDF(PDF_FILE)) {
26+
isPdf = true;
27+
} catch (IOException ioe) {
28+
isPdf = false;
29+
}
30+
assertTrue(isPdf);
31+
}
32+
33+
@Test
34+
void whenDetectPdfByItext_thenCorrect() {
35+
boolean isPdf;
36+
try (PdfDocument pdfDoc = new PdfDocument(new PdfReader(PDF_FILE))) {
37+
isPdf = true;
38+
} catch (ITextException | IOException e) {
39+
isPdf = false;
40+
}
41+
assertTrue(isPdf);
42+
}
43+
44+
@Test
45+
void whenDetectPdfByFileSignature_thenCorrect() throws IOException {
46+
boolean isPdf = false;
47+
try (InputStream fis = new BufferedInputStream(new FileInputStream(PDF_FILE))) {
48+
byte[] bytes = new byte[5];
49+
if (fis.read(bytes) == 5) {
50+
String header = new String(bytes);
51+
isPdf = Objects.equals(header, "%PDF-");
52+
}
53+
}
54+
assertTrue(isPdf);
55+
}
56+
57+
@Test
58+
void whenDetectPdfByTika_thenCorrect() throws IOException {
59+
Tika tika = new Tika();
60+
boolean isPdf = Objects.equals(tika.detect(PDF_FILE), "application/pdf");
61+
assertTrue(isPdf);
62+
}
63+
64+
}

0 commit comments

Comments
 (0)