Skip to content

Commit 233d93b

Browse files
committed
[RELEASE] iText pdfOCR 4.1.2
2 parents ff532a1 + de9afc6 commit 233d93b

File tree

9 files changed

+34
-70
lines changed

9 files changed

+34
-70
lines changed

pdfocr-api/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>4.1.1</version>
8+
<version>4.1.2</version>
99
</parent>
1010

1111
<properties>

pdfocr-onnxtr/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>4.1.1</version>
8+
<version>4.1.2</version>
99
</parent>
1010

1111
<artifactId>pdfocr-onnxtr</artifactId>

pdfocr-onnxtr/src/main/java/com/itextpdf/pdfocr/onnxtr/actions/data/PdfOcrOnnxTrProductData.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ This file is part of the iText (R) project.
3030
public final class PdfOcrOnnxTrProductData {
3131
private static final String PDF_OCR_ONNXTR_PRODUCT_NAME = "pdfOcr-onnxtr";
3232
private static final String PDF_OCR_ONNXTR_PUBLIC_PRODUCT_NAME = "pdfOCR-OnnxTR";
33-
private static final String PDF_OCR_VERSION = "4.1.1";
33+
private static final String PDF_OCR_VERSION = "4.1.2";
3434
private static final int PDF_OCR_COPYRIGHT_SINCE = 2000;
3535
private static final int PDF_OCR_COPYRIGHT_TO = 2025;
3636

pdfocr-onnxtr/src/test/java/com/itextpdf/pdfocr/onnxtr/OnnxTRCmykIntegrationTest.java

Lines changed: 23 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@ This file is part of the iText (R) project.
2929
import com.itextpdf.kernel.pdf.PdfWriter;
3030
import com.itextpdf.pdfocr.OcrPdfCreator;
3131
import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
32+
import com.itextpdf.pdfocr.exceptions.PdfOcrInputException;
3233
import com.itextpdf.pdfocr.onnxtr.detection.IDetectionPredictor;
3334
import com.itextpdf.pdfocr.onnxtr.detection.OnnxDetectionPredictor;
35+
import com.itextpdf.pdfocr.onnxtr.exceptions.PdfOcrOnnxTrExceptionMessageConstant;
3436
import com.itextpdf.pdfocr.onnxtr.orientation.IOrientationPredictor;
3537
import com.itextpdf.pdfocr.onnxtr.orientation.OnnxOrientationPredictor;
3638
import com.itextpdf.pdfocr.onnxtr.recognition.IRecognitionPredictor;
@@ -83,16 +85,19 @@ public void rainbowInvertedCmykTest() throws IOException {
8385
String dest = TARGET_DIRECTORY + "rainbowInvertedCmykTest.pdf";
8486
String cmpTxt = TEST_DIRECTORY + "cmp_rainbowInvertedCmykTest.txt";
8587

86-
if (isFixedInJdk()) {
88+
try {
8789
doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA));
8890
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) {
8991
ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1");
9092
Assertions.assertEquals(DeviceCmyk.MAGENTA, extractionStrategy.getFillColor());
9193
Assertions.assertEquals(getCmpText(cmpTxt), extractionStrategy.getResultantText());
9294
}
93-
} else {
94-
Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null));
95-
Assertions.assertEquals("Failed to read image.", e.getMessage());
95+
} catch (PdfOcrInputException e) {
96+
// CMYK bug https://bugs.openjdk.org/browse/JDK-8274735 in openJDK:
97+
// fixed for jdk8 from 351 onwards, for jdk11 from 16 onwards and for jdk17 starting from 4.
98+
// Amazon corretto jdk started support CMYK for JPEG from 11 version.
99+
// Temurin 8 does not support CMYK for JPEG either.
100+
Assertions.assertEquals(PdfOcrOnnxTrExceptionMessageConstant.FAILED_TO_READ_IMAGE, e.getMessage());
96101
}
97102
}
98103

@@ -102,7 +107,7 @@ public void rainbowAdobeCmykTest() throws IOException {
102107
String dest = TARGET_DIRECTORY + "rainbowAdobeCmykTest.pdf";
103108
String cmpTxt = TEST_DIRECTORY + "cmp_rainbowAdobeCmykTest.txt";
104109

105-
if (isFixedInJdk()) {
110+
try {
106111
doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA));
107112
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) {
108113
ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1");
@@ -111,9 +116,12 @@ public void rainbowAdobeCmykTest() throws IOException {
111116
extractionStrategy.getResultantText()) / getCmpText(cmpTxt).length();
112117
Assertions.assertTrue(relativeDistance < 0.05);
113118
}
114-
} else {
115-
Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null));
116-
Assertions.assertEquals("Failed to read image.", e.getMessage());
119+
} catch (PdfOcrInputException e) {
120+
// CMYK bug https://bugs.openjdk.org/browse/JDK-8274735 in openJDK:
121+
// fixed for jdk8 from 351 onwards, for jdk11 from 16 onwards and for jdk17 starting from 4.
122+
// Amazon corretto jdk started support CMYK for JPEG from 11 version.
123+
// Temurin 8 does not support CMYK for JPEG either.
124+
Assertions.assertEquals(PdfOcrOnnxTrExceptionMessageConstant.FAILED_TO_READ_IMAGE, e.getMessage());
117125
}
118126
}
119127

@@ -123,64 +131,22 @@ public void rainbowCmykNoProfileTest() throws IOException {
123131
String dest = TARGET_DIRECTORY + "rainbowCmykNoProfileTest.pdf";
124132
String cmpTxt = TEST_DIRECTORY + "cmp_rainbowCmykNoProfileTest.txt";
125133

126-
if (isFixedInJdk()) {
134+
try {
127135
doOcrAndCreatePdf(src, dest, creatorProperties("Text1", DeviceCmyk.MAGENTA));
128136
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(dest))) {
129137
ExtractionStrategy extractionStrategy = OnnxTestUtils.extractTextFromLayer(pdfDocument, 1, "Text1");
130138
Assertions.assertEquals(DeviceCmyk.MAGENTA, extractionStrategy.getFillColor());
131139
Assertions.assertEquals(getCmpText(cmpTxt), extractionStrategy.getResultantText());
132140
}
133-
} else {
134-
Exception e = Assertions.assertThrows(Exception.class, () -> doOcrAndCreatePdf(src, dest, null));
135-
Assertions.assertEquals("Failed to read image.", e.getMessage());
141+
} catch (PdfOcrInputException e) {
142+
// CMYK bug https://bugs.openjdk.org/browse/JDK-8274735 in openJDK:
143+
// fixed for jdk8 from 351 onwards, for jdk11 from 16 onwards and for jdk17 starting from 4.
144+
// Amazon corretto jdk started support CMYK for JPEG from 11 version.
145+
// Temurin 8 does not support CMYK for JPEG either.
146+
Assertions.assertEquals(PdfOcrOnnxTrExceptionMessageConstant.FAILED_TO_READ_IMAGE, e.getMessage());
136147
}
137148
}
138149

139-
private static boolean isFixedInJdk() {
140-
//Fixed CMYK bug https://bugs.openjdk.org/browse/JDK-8274735 for openJDK:
141-
//jdk8 from 351 onwards, for jdk11 from 16 onwards and for jdk17 starting from 4.
142-
//Amazon corretto jdk started support CMYK for JPEG from 11 version.
143-
//Temurin 8 does not support CMYK for JPEG either.
144-
String versionStr = System.getProperty("java.version");
145-
String vendorStr = System.getProperty("java.vendor");
146-
boolean isFixed = false;
147-
int majorVer = getMajorVer(versionStr);
148-
String[] split = versionStr.split("[._-]");
149-
int minorVer = Integer.parseInt(split[split.length - 1]);
150-
151-
switch (majorVer) {
152-
case 8:
153-
if ("Amazon.com Inc.".equals(vendorStr) || "Temurin".equals(vendorStr)) {
154-
return false;
155-
}
156-
157-
isFixed = minorVer >= 351;
158-
break;
159-
case 11:
160-
isFixed = minorVer >= 16;
161-
break;
162-
case 17:
163-
isFixed = minorVer >= 4;
164-
break;
165-
default:
166-
isFixed = true;
167-
}
168-
169-
return isFixed;
170-
}
171-
172-
private static int getMajorVer(String versionStr) {
173-
int majorVer = 0;
174-
String[] split = versionStr.split("\\.");
175-
if (versionStr.startsWith("1.")) {
176-
//jdk versions 1 - 8 have 1. as prefix
177-
majorVer = Integer.parseInt(split[1]);
178-
} else {
179-
majorVer = Integer.parseInt(split[0]);
180-
}
181-
return majorVer;
182-
}
183-
184150
private OcrPdfCreatorProperties creatorProperties(String layerName, Color color) {
185151
OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
186152
ocrPdfCreatorProperties.setTextLayerName(layerName);

pdfocr-tesseract4/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>4.1.1</version>
8+
<version>4.1.2</version>
99
</parent>
1010

1111
<artifactId>pdfocr-tesseract4</artifactId>

pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ This file is part of the iText (R) project.
3030
public class PdfOcrTesseract4ProductData {
3131
private static final String PDF_OCR_TESSERACT4_PRODUCT_NAME = "pdfOcr-tesseract4";
3232
private static final String PDF_OCR_TESSERACT4_PUBLIC_PRODUCT_NAME = "pdfOCR-Tesseract4";
33-
private static final String PDF_OCR_VERSION = "4.1.1";
33+
private static final String PDF_OCR_VERSION = "4.1.2";
3434
private static final int PDF_OCR_COPYRIGHT_SINCE = 2000;
3535
private static final int PDF_OCR_COPYRIGHT_TO = 2025;
3636

pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/ImageIntegrationTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ This file is part of the iText (R) project.
2727
import com.itextpdf.pdfocr.TextInfo;
2828
import org.junit.jupiter.api.Assertions;
2929
import org.junit.jupiter.api.BeforeEach;
30+
import org.junit.jupiter.api.Disabled;
3031
import org.junit.jupiter.api.Test;
3132
import org.slf4j.Logger;
3233
import org.slf4j.LoggerFactory;
@@ -88,6 +89,7 @@ public void testHocrRotatedImage() throws IOException {
8889
}
8990

9091
@Test
92+
@Disabled("DEVSIX-9261 Investigate test failures on Windows Server 2025 and Windows 11")
9193
public void compareRotatedImage() throws InterruptedException, IOException {
9294
String testName = "compareRotatedImage";
9395
String filename = "90_degrees_rotated";
@@ -109,7 +111,7 @@ public void compareRotatedImage() throws InterruptedException, IOException {
109111
Arrays.<String>asList("eng"), Arrays.<String>asList(NOTO_SANS_FONT_PATH),
110112
null, true);
111113

112-
// Because of difference of tesseract 5 and tesseract 4 there're some differences in text recognition.
114+
// Because of difference of tesseract 5 and tesseract 4 there are some differences in text recognition.
113115
// So the goal of this test is to make text invisible and check if image is rotated.
114116
// Proper text recognition is compared in testHocrRotatedImage test by checking HOCR file.
115117
boolean javaTest = new CompareTool().compareVisually(resultPdfPath, expectedPdfPathJava,

pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>root</artifactId>
8-
<version>9.4.0</version>
8+
<version>9.5.0</version>
99
<relativePath />
1010
</parent>
1111

1212
<artifactId>pdfocr-root</artifactId>
13-
<version>4.1.1</version>
13+
<version>4.1.2</version>
1414
<packaging>pom</packaging>
1515

1616
<name>pdfOCR</name>
@@ -23,7 +23,7 @@
2323
</modules>
2424

2525
<properties>
26-
<itext.version>9.4.0</itext.version>
26+
<itext.version>9.5.0</itext.version>
2727
<java.version>1.8</java.version>
2828
<jdkLevel>${java.version}</jdkLevel>
2929
<maven.compiler.source>${java.version}</maven.compiler.source>

sharpenConfiguration.xml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
<file path="com/itextpdf/pdfocr/onnxtr/OnnxTRPdfAIntegrationTest.java" />
2222
<file path="com/itextpdf/pdfocr/onnxtr/OnnxTRRotationIntegrationTest.java" />
2323
</fileset>
24-
<!-- TODO DEVSIX-9305: fileset should be removed and OnnxTRCmykIntegrationTest made autoportable-->
25-
<fileset reason="Bad handling of JPEG CMYK in java versions without JDK-8274735 fix">
26-
<file path="com/itextpdf/pdfocr/onnxtr/OnnxTRCmykIntegrationTest.java" />
27-
</fileset>
2824
<fileset reason="Difference in Iterators in java and enumerators in .net">
2925
<file path="com/itextpdf/pdfocr/onnxtr/util/Batching.java" />
3026
<file path="com/itextpdf/pdfocr/onnxtr/util/BatchProcessingGeneratorTest.java" />

0 commit comments

Comments
 (0)