|
| 1 | +/* |
| 2 | + This file is part of the iText (R) project. |
| 3 | + Copyright (c) 1998-2022 iText Group NV |
| 4 | + Authors: iText Software. |
| 5 | +
|
| 6 | + This program is free software; you can redistribute it and/or modify |
| 7 | + it under the terms of the GNU Affero General Public License version 3 |
| 8 | + as published by the Free Software Foundation with the addition of the |
| 9 | + following permission added to Section 15 as permitted in Section 7(a): |
| 10 | + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY |
| 11 | + ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT |
| 12 | + OF THIRD PARTY RIGHTS |
| 13 | +
|
| 14 | + This program is distributed in the hope that it will be useful, but |
| 15 | + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 16 | + or FITNESS FOR A PARTICULAR PURPOSE. |
| 17 | + See the GNU Affero General Public License for more details. |
| 18 | + You should have received a copy of the GNU Affero General Public License |
| 19 | + along with this program; if not, see http://www.gnu.org/licenses or write to |
| 20 | + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 21 | + Boston, MA, 02110-1301 USA, or download the license from the following URL: |
| 22 | + http://itextpdf.com/terms-of-use/ |
| 23 | +
|
| 24 | + The interactive user interfaces in modified source and object code versions |
| 25 | + of this program must display Appropriate Legal Notices, as required under |
| 26 | + Section 5 of the GNU Affero General Public License. |
| 27 | +
|
| 28 | + In accordance with Section 7(b) of the GNU Affero General Public License, |
| 29 | + a covered work must retain the producer line in every PDF that is created |
| 30 | + or manipulated using iText. |
| 31 | +
|
| 32 | + You can be released from the requirements of the license by purchasing |
| 33 | + a commercial license. Buying such a license is mandatory as soon as you |
| 34 | + develop commercial activities involving the iText software without |
| 35 | + disclosing the source code of your own applications. |
| 36 | + These activities include: offering paid services to customers as an ASP, |
| 37 | + serving PDFs on the fly in a web application, shipping iText with a closed |
| 38 | + source product. |
| 39 | +
|
| 40 | + For more information, please contact iText Software Corp. at this |
| 41 | + |
| 42 | + */ |
| 43 | +package com.itextpdf.rups.view.itext.contentstream; |
| 44 | + |
| 45 | +import com.itextpdf.io.font.PdfEncodings; |
| 46 | +import com.itextpdf.test.annotations.type.UnitTest; |
| 47 | +import org.junit.Assert; |
| 48 | +import org.junit.Test; |
| 49 | +import org.junit.experimental.categories.Category; |
| 50 | +import org.junit.runner.RunWith; |
| 51 | +import org.junit.runners.Parameterized; |
| 52 | + |
| 53 | +import java.nio.charset.StandardCharsets; |
| 54 | +import java.util.ArrayList; |
| 55 | +import java.util.Collection; |
| 56 | + |
| 57 | +@RunWith(Parameterized.class) |
| 58 | +@Category(UnitTest.class) |
| 59 | +public class PdfDocEncodingHeuristicTest { |
| 60 | + @Parameterized.Parameters |
| 61 | + public static Collection<Object[]> data() { |
| 62 | + Collection<Object[]> cases = new ArrayList<>(); |
| 63 | + String[] positiveStrings = new String[] { |
| 64 | + "abccadslk fjds", |
| 65 | + "abccadslk\tfjds", |
| 66 | + "abccadslk\nfjds", |
| 67 | + "abccadslk\rfjds", |
| 68 | + "/+xy1209837a$^!@$#&#*!&dksjfao7210", |
| 69 | + "/+xy120921312½", |
| 70 | + "en_US", "en-US", |
| 71 | + |
| 72 | + "© iText Software", |
| 73 | + "Bär" |
| 74 | + }; |
| 75 | + |
| 76 | + byte[][] positiveBytes = new byte[][] { |
| 77 | + new byte[] { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x68, 0x65, 0x6c, 0x6c, 0x6f }, |
| 78 | + new byte[] { 0x68, 0x65, 0x6c, 0x6c, 0x6f, (byte) 0x92, 0x68, 0x65, 0x6c, 0x6c, 0x6f } |
| 79 | + }; |
| 80 | + |
| 81 | + String[] negativeStrings = new String[] { |
| 82 | + "©z®z", |
| 83 | + "/+xy2½", |
| 84 | + "abccadslk\ffjds", // linefeed is whitespace, but undefined in PDFDocEncoding |
| 85 | + "Hello\007world" // non-whitespace control character |
| 86 | + }; |
| 87 | + |
| 88 | + byte[][] negativeBytes = new byte[][] { |
| 89 | + // utf8 rendering of ä doesn't represent a letter in PDFDocEncoding |
| 90 | + "Bär".getBytes(StandardCharsets.UTF_8), |
| 91 | + // proportion of non-letter bytes too high |
| 92 | + new byte[] { 0x68, 0x65, 0x6c, 0x6c, 0x6f, (byte) 0x92}, |
| 93 | + // no non-letter bytes at all |
| 94 | + new byte[] { 0x01, 0x02, 0x03, 0x04 }, |
| 95 | + // contains control character that isn't whitespace |
| 96 | + new byte[] { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x01, 0x68, 0x65, 0x6c, 0x6c, 0x6f } |
| 97 | + }; |
| 98 | + |
| 99 | + for(String s : positiveStrings) { |
| 100 | + cases.add(new Object[] { |
| 101 | + PdfEncodings.convertToBytes(s, PdfEncodings.PDF_DOC_ENCODING), |
| 102 | + true |
| 103 | + }); |
| 104 | + } |
| 105 | + |
| 106 | + for(byte[] b : positiveBytes) { |
| 107 | + cases.add(new Object[] {b, true}); |
| 108 | + } |
| 109 | + |
| 110 | + for(String s : negativeStrings) { |
| 111 | + cases.add(new Object[] { |
| 112 | + PdfEncodings.convertToBytes(s, PdfEncodings.PDF_DOC_ENCODING), |
| 113 | + false |
| 114 | + }); |
| 115 | + } |
| 116 | + |
| 117 | + for(byte[] b : negativeBytes) { |
| 118 | + cases.add(new Object[] {b, false}); |
| 119 | + } |
| 120 | + |
| 121 | + return cases; |
| 122 | + } |
| 123 | + |
| 124 | + private final byte[] encoded; |
| 125 | + private final boolean textExpected; |
| 126 | + |
| 127 | + public PdfDocEncodingHeuristicTest(byte[] encoded, boolean textExpected) { |
| 128 | + this.encoded = encoded; |
| 129 | + this.textExpected = textExpected; |
| 130 | + } |
| 131 | + |
| 132 | + @Test |
| 133 | + public void testPdfDocTextHeuristic() { |
| 134 | + boolean result = ContentStreamHandlingUtils.isMaybePdfDocEncodedText(this.encoded); |
| 135 | + |
| 136 | + String asPdfDoc = PdfEncodings.convertToString(this.encoded, PdfEncodings.PDF_DOC_ENCODING); |
| 137 | + Assert.assertEquals(asPdfDoc, this.textExpected, result); |
| 138 | + } |
| 139 | + |
| 140 | +} |
0 commit comments