Skip to content

Commit 51aa03b

Browse files
authored
feat(cbz-to-pdf,pdf-to-cbz): Converter for CBZ format to and from PDF (Stirling-Tools#4472)
1 parent 413cd0c commit 51aa03b

File tree

12 files changed

+737
-2
lines changed

12 files changed

+737
-2
lines changed
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
package stirling.software.common.util;
2+
3+
import java.io.BufferedInputStream;
4+
import java.io.ByteArrayOutputStream;
5+
import java.io.IOException;
6+
import java.io.InputStream;
7+
import java.util.ArrayList;
8+
import java.util.Comparator;
9+
import java.util.Enumeration;
10+
import java.util.List;
11+
import java.util.regex.Pattern;
12+
import java.util.zip.ZipEntry;
13+
import java.util.zip.ZipFile;
14+
import java.util.zip.ZipInputStream;
15+
16+
import org.apache.commons.io.FilenameUtils;
17+
import org.apache.pdfbox.pdmodel.PDDocument;
18+
import org.apache.pdfbox.pdmodel.PDPage;
19+
import org.apache.pdfbox.pdmodel.PDPageContentStream;
20+
import org.apache.pdfbox.pdmodel.common.PDRectangle;
21+
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
22+
import org.springframework.web.multipart.MultipartFile;
23+
24+
import lombok.experimental.UtilityClass;
25+
import lombok.extern.slf4j.Slf4j;
26+
27+
import stirling.software.common.service.CustomPDFDocumentFactory;
28+
29+
@Slf4j
30+
@UtilityClass
31+
public class CbzUtils {
32+
33+
private final Pattern IMAGE_PATTERN =
34+
Pattern.compile(".*\\.(jpg|jpeg|png|gif|bmp|webp)$", Pattern.CASE_INSENSITIVE);
35+
36+
public byte[] convertCbzToPdf(
37+
MultipartFile cbzFile,
38+
CustomPDFDocumentFactory pdfDocumentFactory,
39+
TempFileManager tempFileManager)
40+
throws IOException {
41+
42+
validateCbzFile(cbzFile);
43+
44+
try (TempFile tempFile = new TempFile(tempFileManager, ".cbz")) {
45+
cbzFile.transferTo(tempFile.getFile());
46+
47+
// Early ZIP validity check using ZipInputStream (fail fast on non-zip content)
48+
try (BufferedInputStream bis =
49+
new BufferedInputStream(
50+
new java.io.FileInputStream(tempFile.getFile()));
51+
ZipInputStream zis = new ZipInputStream(bis)) {
52+
if (zis.getNextEntry() == null) {
53+
throw new IllegalArgumentException("Archive is empty or invalid ZIP");
54+
}
55+
} catch (IOException e) {
56+
throw new IllegalArgumentException("Invalid CBZ/ZIP archive", e);
57+
}
58+
59+
try (PDDocument document = pdfDocumentFactory.createNewDocument();
60+
ZipFile zipFile = new ZipFile(tempFile.getFile())) {
61+
Enumeration<? extends ZipEntry> entries = zipFile.entries();
62+
List<ImageEntryData> imageEntries = new ArrayList<>();
63+
while (entries.hasMoreElements()) {
64+
ZipEntry entry = entries.nextElement();
65+
if (!entry.isDirectory() && isImageFile(entry.getName())) {
66+
try (InputStream is = zipFile.getInputStream(entry)) {
67+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
68+
is.transferTo(baos);
69+
imageEntries.add(
70+
new ImageEntryData(entry.getName(), baos.toByteArray()));
71+
} catch (IOException e) {
72+
log.warn("Error reading image {}: {}", entry.getName(), e.getMessage());
73+
}
74+
}
75+
}
76+
77+
imageEntries.sort(
78+
Comparator.comparing(ImageEntryData::name, new NaturalOrderComparator()));
79+
80+
if (imageEntries.isEmpty()) {
81+
throw new IllegalArgumentException("No valid images found in the CBZ file");
82+
}
83+
84+
for (ImageEntryData imageEntry : imageEntries) {
85+
try {
86+
PDImageXObject pdImage =
87+
PDImageXObject.createFromByteArray(
88+
document, imageEntry.data(), imageEntry.name());
89+
PDPage page =
90+
new PDPage(
91+
new PDRectangle(pdImage.getWidth(), pdImage.getHeight()));
92+
document.addPage(page);
93+
try (PDPageContentStream contentStream =
94+
new PDPageContentStream(document, page)) {
95+
contentStream.drawImage(pdImage, 0, 0);
96+
}
97+
} catch (IOException e) {
98+
log.warn(
99+
"Error processing image {}: {}", imageEntry.name(), e.getMessage());
100+
}
101+
}
102+
103+
if (document.getNumberOfPages() == 0) {
104+
throw new IllegalArgumentException(
105+
"No images could be processed from the CBZ file");
106+
}
107+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
108+
document.save(baos);
109+
return baos.toByteArray();
110+
}
111+
}
112+
}
113+
114+
private void validateCbzFile(MultipartFile file) {
115+
if (file == null || file.isEmpty()) {
116+
throw new IllegalArgumentException("File cannot be null or empty");
117+
}
118+
119+
String filename = file.getOriginalFilename();
120+
if (filename == null) {
121+
throw new IllegalArgumentException("File must have a name");
122+
}
123+
124+
String extension = FilenameUtils.getExtension(filename).toLowerCase();
125+
if (!"cbz".equals(extension) && !"zip".equals(extension)) {
126+
throw new IllegalArgumentException("File must be a CBZ or ZIP archive");
127+
}
128+
}
129+
130+
public boolean isCbzFile(MultipartFile file) {
131+
String filename = file.getOriginalFilename();
132+
if (filename == null) {
133+
return false;
134+
}
135+
136+
String extension = FilenameUtils.getExtension(filename).toLowerCase();
137+
return "cbz".equals(extension) || "zip".equals(extension);
138+
}
139+
140+
private boolean isImageFile(String filename) {
141+
return IMAGE_PATTERN.matcher(filename).matches();
142+
}
143+
144+
private record ImageEntryData(String name, byte[] data) {}
145+
146+
private class NaturalOrderComparator implements Comparator<String> {
147+
@Override
148+
public int compare(String s1, String s2) {
149+
int len1 = s1.length();
150+
int len2 = s2.length();
151+
int marker1 = 0, marker2 = 0;
152+
153+
while (marker1 < len1 && marker2 < len2) {
154+
String chunk1 = getChunk(s1, len1, marker1);
155+
marker1 += chunk1.length();
156+
157+
String chunk2 = getChunk(s2, len2, marker2);
158+
marker2 += chunk2.length();
159+
160+
int result;
161+
if (isDigit(chunk1.charAt(0)) && isDigit(chunk2.charAt(0))) {
162+
int thisNumericValue = Integer.parseInt(chunk1);
163+
int thatNumericValue = Integer.parseInt(chunk2);
164+
result = Integer.compare(thisNumericValue, thatNumericValue);
165+
} else {
166+
result = chunk1.compareTo(chunk2);
167+
}
168+
169+
if (result != 0) {
170+
return result;
171+
}
172+
}
173+
174+
return Integer.compare(len1, len2);
175+
}
176+
177+
private static String getChunk(String s, int length, int marker) {
178+
StringBuilder chunk = new StringBuilder();
179+
char c = s.charAt(marker);
180+
chunk.append(c);
181+
marker++;
182+
183+
if (isDigit(c)) {
184+
while (marker < length && isDigit(s.charAt(marker))) {
185+
chunk.append(s.charAt(marker));
186+
marker++;
187+
}
188+
} else {
189+
while (marker < length && !isDigit(s.charAt(marker))) {
190+
chunk.append(s.charAt(marker));
191+
marker++;
192+
}
193+
}
194+
return chunk.toString();
195+
}
196+
197+
private static boolean isDigit(char ch) {
198+
return ch >= '0' && ch <= '9';
199+
}
200+
}
201+
}
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package stirling.software.common.util;
2+
3+
import java.awt.image.BufferedImage;
4+
import java.io.ByteArrayOutputStream;
5+
import java.io.IOException;
6+
import java.util.zip.ZipEntry;
7+
import java.util.zip.ZipOutputStream;
8+
9+
import javax.imageio.ImageIO;
10+
11+
import org.apache.commons.io.FilenameUtils;
12+
import org.apache.pdfbox.pdmodel.PDDocument;
13+
import org.apache.pdfbox.rendering.ImageType;
14+
import org.apache.pdfbox.rendering.PDFRenderer;
15+
import org.springframework.web.multipart.MultipartFile;
16+
17+
import lombok.extern.slf4j.Slf4j;
18+
19+
import stirling.software.common.service.CustomPDFDocumentFactory;
20+
21+
@Slf4j
22+
public class PdfToCbzUtils {
23+
24+
public static byte[] convertPdfToCbz(
25+
MultipartFile pdfFile, int dpi, CustomPDFDocumentFactory pdfDocumentFactory)
26+
throws IOException {
27+
28+
validatePdfFile(pdfFile);
29+
30+
try (PDDocument document = pdfDocumentFactory.load(pdfFile)) {
31+
if (document.getNumberOfPages() == 0) {
32+
throw new IllegalArgumentException("PDF file contains no pages");
33+
}
34+
35+
return createCbzFromPdf(document, dpi);
36+
}
37+
}
38+
39+
private static void validatePdfFile(MultipartFile file) {
40+
if (file == null || file.isEmpty()) {
41+
throw new IllegalArgumentException("File cannot be null or empty");
42+
}
43+
44+
String filename = file.getOriginalFilename();
45+
if (filename == null) {
46+
throw new IllegalArgumentException("File must have a name");
47+
}
48+
49+
String extension = FilenameUtils.getExtension(filename).toLowerCase();
50+
if (!"pdf".equals(extension)) {
51+
throw new IllegalArgumentException("File must be a PDF");
52+
}
53+
}
54+
55+
private static byte[] createCbzFromPdf(PDDocument document, int dpi) throws IOException {
56+
PDFRenderer pdfRenderer = new PDFRenderer(document);
57+
58+
try (ByteArrayOutputStream cbzOutputStream = new ByteArrayOutputStream();
59+
ZipOutputStream zipOut = new ZipOutputStream(cbzOutputStream)) {
60+
61+
int totalPages = document.getNumberOfPages();
62+
63+
for (int pageIndex = 0; pageIndex < totalPages; pageIndex++) {
64+
try {
65+
BufferedImage image =
66+
pdfRenderer.renderImageWithDPI(pageIndex, dpi, ImageType.RGB);
67+
68+
String imageFilename = String.format("page_%03d.png", pageIndex + 1);
69+
70+
ZipEntry zipEntry = new ZipEntry(imageFilename);
71+
zipOut.putNextEntry(zipEntry);
72+
73+
ImageIO.write(image, "PNG", zipOut);
74+
zipOut.closeEntry();
75+
76+
} catch (IOException e) {
77+
log.warn("Error processing page {}: {}", pageIndex + 1, e.getMessage());
78+
} catch (OutOfMemoryError e) {
79+
throw ExceptionUtils.createOutOfMemoryDpiException(pageIndex + 1, dpi, e);
80+
} catch (NegativeArraySizeException e) {
81+
throw ExceptionUtils.createOutOfMemoryDpiException(pageIndex + 1, dpi, e);
82+
}
83+
}
84+
85+
zipOut.finish();
86+
return cbzOutputStream.toByteArray();
87+
}
88+
}
89+
90+
public static boolean isPdfFile(MultipartFile file) {
91+
String filename = file.getOriginalFilename();
92+
if (filename == null) {
93+
return false;
94+
}
95+
96+
String extension = FilenameUtils.getExtension(filename).toLowerCase();
97+
return "pdf".equals(extension);
98+
}
99+
}

0 commit comments

Comments
 (0)