Skip to content

Commit 866fef5

Browse files
committed
dspace-api: Update syntax for pdfbox 3.0.x
1 parent 2de5b58 commit 866fef5

File tree

7 files changed

+29
-18
lines changed

7 files changed

+29
-18
lines changed

dspace-api/src/main/java/org/dspace/app/mediafilter/ImageMagickThumbnailFilter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import java.util.regex.Pattern;
1515
import java.util.regex.PatternSyntaxException;
1616

17-
import org.apache.pdfbox.pdmodel.PDDocument;
17+
import org.apache.pdfbox.Loader;
1818
import org.apache.pdfbox.pdmodel.PDPage;
1919
import org.apache.pdfbox.pdmodel.common.PDRectangle;
2020
import org.dspace.content.Bitstream;
@@ -153,8 +153,8 @@ public File getImageFile(File f, boolean verbose)
153153
// the CropBox is missing or empty because pdfbox will set it to the
154154
// same size as the MediaBox if it doesn't exist. Also note that we
155155
// only need to check the first page, since that's what we use for
156-
// generating the thumbnail (PDDocument uses a zero-based index).
157-
PDPage pdfPage = PDDocument.load(f).getPage(0);
156+
// generating the thumbnail (PDPage uses a zero-based index).
157+
PDPage pdfPage = Loader.loadPDF(f).getPage(0);
158158
PDRectangle pdfPageMediaBox = pdfPage.getMediaBox();
159159
PDRectangle pdfPageCropBox = pdfPage.getCropBox();
160160

dspace-api/src/main/java/org/dspace/app/mediafilter/PDFBoxThumbnail.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import java.io.InputStream;
1212

1313
import org.apache.logging.log4j.Logger;
14+
import org.apache.pdfbox.Loader;
15+
import org.apache.pdfbox.io.RandomAccessReadBuffer;
1416
import org.apache.pdfbox.pdmodel.PDDocument;
1517
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
1618
import org.apache.pdfbox.rendering.PDFRenderer;
@@ -71,7 +73,7 @@ public InputStream getDestinationStream(Item currentItem, InputStream source, bo
7173
BufferedImage buf;
7274

7375
// Render the page image.
74-
try ( PDDocument doc = PDDocument.load(source); ) {
76+
try ( PDDocument doc = Loader.loadPDF(new RandomAccessReadBuffer(source)); ) {
7577
PDFRenderer renderer = new PDFRenderer(doc);
7678
buf = renderer.renderImage(0);
7779
} catch (InvalidPasswordException ex) {

dspace-api/src/main/java/org/dspace/content/packager/PDFPackager.java

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919

2020
import org.apache.commons.lang3.ArrayUtils;
2121
import org.apache.logging.log4j.Logger;
22+
import org.apache.pdfbox.Loader;
2223
import org.apache.pdfbox.cos.COSDocument;
2324
import org.apache.pdfbox.io.MemoryUsageSetting;
24-
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
25+
import org.apache.pdfbox.io.RandomAccessReadBuffer;
2526
import org.apache.pdfbox.io.ScratchFile;
26-
import org.apache.pdfbox.pdfparser.PDFParser;
2727
import org.apache.pdfbox.pdmodel.PDDocument;
2828
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
2929
import org.dspace.authorize.AuthorizeException;
@@ -331,19 +331,23 @@ private void crosswalkPDF(Context context, Item item, InputStream metadata)
331331
COSDocument cos = null;
332332

333333
try {
334-
ScratchFile scratchFile = null;
334+
PDDocument document = null;
335+
335336
try {
336337
long useRAM = Runtime.getRuntime().freeMemory() * 80 / 100; // use up to 80% of JVM free memory
337-
scratchFile = new ScratchFile(
338-
MemoryUsageSetting.setupMixed(useRAM)); // then fallback to temp file (unlimited size)
338+
document = Loader.loadPDF(
339+
new RandomAccessReadBuffer(metadata),
340+
() -> new ScratchFile(MemoryUsageSetting.setupMixed(useRAM))); // then fallback to temp file (unlimited size)
339341
} catch (IOException ioe) {
340342
log.warn("Error initializing scratch file: " + ioe.getMessage());
341343
}
342344

343-
PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(metadata), scratchFile);
344-
parser.parse();
345-
cos = parser.getDocument();
345+
// sanity check: loaded PDF document must not be null.
346+
if(document == null) {
347+
throw new MetadataValidationException("The provided stream could not be parsed into a PDF document.");
348+
}
346349

350+
cos = document.getDocument();
347351
// sanity check: PDFBox breaks on encrypted documents, so give up.
348352
if (cos.getEncryptionDictionary() != null) {
349353
throw new MetadataValidationException("This packager cannot accept an encrypted PDF document.");

dspace-api/src/main/java/org/dspace/disseminate/CitationDocumentServiceImpl.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import org.apache.commons.lang3.tuple.Pair;
2121
import org.apache.logging.log4j.LogManager;
2222
import org.apache.logging.log4j.Logger;
23+
import org.apache.pdfbox.Loader;
24+
import org.apache.pdfbox.io.RandomAccessReadBuffer;
2325
import org.apache.pdfbox.pdmodel.PDDocument;
2426
import org.apache.pdfbox.pdmodel.PDPage;
2527
import org.dspace.authorize.AuthorizeException;
@@ -264,7 +266,7 @@ public Pair<byte[], Long> makeCitedDocument(Context context, Bitstream bitstream
264266

265267
private PDDocument loadDocumentFromDB(Context context, Bitstream bitstream) {
266268
try (var inputStream = bitstreamService.retrieve(context, bitstream)) {
267-
return PDDocument.load(inputStream);
269+
return Loader.loadPDF(new RandomAccessReadBuffer(inputStream));
268270
} catch (IOException | SQLException | AuthorizeException e) {
269271
throw new RuntimeException(e);
270272
}
@@ -307,4 +309,4 @@ private void addCoverPageToDocument(PDDocument document, PDDocument sourceDocume
307309
}
308310
}
309311
}
310-
}
312+
}

dspace-api/src/main/java/org/dspace/disseminate/PdfGenerator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import java.util.Map;
1616
import java.util.stream.Collectors;
1717

18+
import org.apache.pdfbox.Loader;
1819
import org.apache.pdfbox.pdmodel.PDDocument;
1920
import org.thymeleaf.TemplateEngine;
2021
import org.thymeleaf.context.Context;
@@ -84,7 +85,7 @@ public void generateToFile(String html, File toFile) {
8485
public PDDocument generate(String html) {
8586
try (var out = new ByteArrayOutputStream()) {
8687
generate(html, out);
87-
return PDDocument.load(out.toByteArray());
88+
return Loader.loadPDF(out.toByteArray());
8889
} catch (IOException e) {
8990
throw new RuntimeException(e);
9091
}

dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@
2929
import org.apache.commons.lang3.StringUtils;
3030
import org.apache.http.HttpException;
3131
import org.apache.http.client.utils.URIBuilder;
32+
import org.apache.jena.ext.xerces.impl.dv.util.Base64;
3233
import org.apache.logging.log4j.LogManager;
3334
import org.apache.logging.log4j.Logger;
34-
import org.apache.xerces.impl.dv.util.Base64;
3535
import org.dspace.app.util.XMLUtils;
3636
import org.dspace.content.Item;
3737
import org.dspace.importer.external.datamodel.ImportRecord;

dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@
6262
import org.apache.commons.io.IOUtils;
6363
import org.apache.commons.lang3.CharEncoding;
6464
import org.apache.commons.lang3.StringUtils;
65+
import org.apache.pdfbox.Loader;
66+
import org.apache.pdfbox.io.RandomAccessReadBuffer;
6567
import org.apache.pdfbox.pdmodel.PDDocument;
6668
import org.apache.pdfbox.text.PDFTextStripper;
6769
import org.apache.solr.client.solrj.SolrServerException;
@@ -1007,7 +1009,7 @@ private String extractPDFText(byte[] content) throws IOException {
10071009

10081010
try (ByteArrayInputStream source = new ByteArrayInputStream(content);
10091011
Writer writer = new StringWriter();
1010-
PDDocument pdfDoc = PDDocument.load(source)) {
1012+
PDDocument pdfDoc = Loader.loadPDF(new RandomAccessReadBuffer(source))) {
10111013

10121014
pts.writeText(pdfDoc, writer);
10131015
return writer.toString();
@@ -1016,7 +1018,7 @@ private String extractPDFText(byte[] content) throws IOException {
10161018

10171019
private int getNumberOfPdfPages(byte[] content) throws IOException {
10181020
try (ByteArrayInputStream source = new ByteArrayInputStream(content);
1019-
PDDocument pdfDoc = PDDocument.load(source)) {
1021+
PDDocument pdfDoc = Loader.loadPDF(new RandomAccessReadBuffer(source))) {
10201022
return pdfDoc.getNumberOfPages();
10211023
}
10221024
}

0 commit comments

Comments
 (0)