Skip to content

Commit 9379a19

Browse files
committed
Add searching differences in type 3 font forming
DEVSIX-2773
1 parent 9f61901 commit 9379a19

File tree

4 files changed

+108
-3
lines changed

4 files changed

+108
-3
lines changed

kernel/src/main/java/com/itextpdf/kernel/font/PdfType3Font.java

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ This file is part of the iText (R) project.
6464
import org.slf4j.Logger;
6565
import org.slf4j.LoggerFactory;
6666

67+
import java.util.Map;
68+
6769
/**
6870
* Low-level API class for Type 3 fonts.
6971
* <p>
@@ -141,13 +143,38 @@ public class PdfType3Font extends PdfSimpleFont<Type3Font> {
141143
}
142144
setFontMatrix(fontMatrix);
143145

146+
if (toUnicode != null && toUnicode.hasByteMappings() && fontEncoding.hasDifferences()) {
147+
for (int i = 0; i < 256; i++) {
148+
int unicode = fontEncoding.getUnicode(i);
149+
PdfName glyphName = new PdfName(fontEncoding.getDifference(i));
150+
if (unicode != -1
151+
&& !glyphName.getValue().equals(FontEncoding.NOTDEF)
152+
&& charProcsDic.containsKey(glyphName)) {
153+
((Type3Font) getFontProgram()).addGlyph(i, unicode, widths[i], null, new Type3Glyph(charProcsDic.getAsStream(glyphName), getDocument()));
154+
}
155+
}
156+
}
157+
158+
159+
Map<Integer, Integer> unicodeToCode = null;
160+
if (toUnicode != null) {
161+
try { unicodeToCode = toUnicode.createReverseMapping(); } catch (Exception ignored){}
162+
}
163+
144164
for (PdfName glyphName : charProcsDic.keySet()) {
145165
int unicode = AdobeGlyphList.nameToUnicode(glyphName.getValue());
146-
if (unicode != -1 && fontEncoding.canEncode(unicode)) {
147-
int code = fontEncoding.convertToByte(unicode);
148-
((Type3Font) getFontProgram()).addGlyph(code, unicode, widths[code], null, new Type3Glyph(charProcsDic.getAsStream(glyphName), getDocument()));
166+
int code = -1;
167+
if (fontEncoding.canEncode(unicode)) {
168+
code = fontEncoding.convertToByte(unicode);
169+
} else if (unicodeToCode != null && unicodeToCode.containsKey(unicode)) {
170+
code = (int) unicodeToCode.get(unicode);
171+
}
172+
if (code != -1 && getFontProgram().getGlyphByCode(code) == null) {
173+
((Type3Font) getFontProgram()).addGlyph(code, unicode, widths[code],
174+
null, new Type3Glyph(charProcsDic.getAsStream(glyphName), getDocument()));
149175
}
150176
}
177+
151178
fillFontDescriptor(fontDictionary.getAsDictionary(PdfName.FontDescriptor));
152179
}
153180

layout/src/test/java/com/itextpdf/layout/LocationTextExtractionStrategyTest.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,12 @@ This file is part of the iText (R) project.
4646
import com.itextpdf.kernel.colors.ColorConstants;
4747
import com.itextpdf.kernel.font.PdfFont;
4848
import com.itextpdf.kernel.font.PdfFontFactory;
49+
import com.itextpdf.kernel.font.PdfType3Font;
4950
import com.itextpdf.kernel.geom.AffineTransform;
5051
import com.itextpdf.kernel.geom.PageSize;
5152
import com.itextpdf.kernel.geom.Rectangle;
5253
import com.itextpdf.kernel.geom.Vector;
54+
import com.itextpdf.kernel.pdf.PdfDictionary;
5355
import com.itextpdf.kernel.pdf.PdfDocument;
5456
import com.itextpdf.kernel.pdf.PdfPage;
5557
import com.itextpdf.kernel.pdf.PdfReader;
@@ -71,6 +73,7 @@ This file is part of the iText (R) project.
7173
import java.io.ByteArrayInputStream;
7274
import java.io.ByteArrayOutputStream;
7375
import java.io.IOException;
76+
import java.nio.charset.StandardCharsets;
7477

7578
import org.junit.Assert;
7679
import org.junit.Test;
@@ -220,6 +223,30 @@ public void testLittleFontSize() throws Exception {
220223
Assert.assertEquals("Preface", text);
221224
}
222225

226+
@Test
227+
public void testType3FontWithDifferences() throws IOException {
228+
String sourcePdf = sourceFolder + "DocumentWithType3FontWithDifferences.pdf";
229+
String comparedTextFile = sourceFolder + "textFromDocWithType3FontWithDifferences.txt";
230+
231+
PdfDocument pdf = new PdfDocument(new PdfReader(sourcePdf));
232+
String result = PdfTextExtractor.getTextFromPage(pdf.getPage(1), new LocationTextExtractionStrategy());
233+
234+
PdfDictionary pdfType3FontDict = (PdfDictionary) pdf.getPdfObject(292);
235+
PdfType3Font pdfType3Font = (PdfType3Font) PdfFontFactory.createFont(pdfType3FontDict);
236+
237+
pdf.close();
238+
239+
Assert.assertEquals(new String(java.nio.file.Files.readAllBytes(new java.io.File(comparedTextFile).toPath()), StandardCharsets.UTF_8), result);
240+
241+
Assert.assertEquals(83, pdfType3Font.getNumberOfGlyphs());
242+
243+
Assert.assertEquals("gA", pdfType3Font.getFontEncoding().getDifference(10));
244+
Assert.assertEquals(41, pdfType3Font.getFontProgram().getGlyphByCode(10).getUnicode());
245+
246+
Assert.assertEquals(".notdef", pdfType3Font.getFontEncoding().getDifference(210));
247+
Assert.assertEquals(928, pdfType3Font.getFontProgram().getGlyphByCode(210).getUnicode());
248+
}
249+
223250
private byte[] createPdfWithNegativeCharSpacing(String str1, float charSpacing, String str2) throws Exception {
224251
ByteArrayOutputStream baos = new ByteArrayOutputStream();
225252
PdfDocument pdfDocument = new PdfDocument(new PdfWriter(baos).setCompressionLevel(0));
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
11/27/2018 Pravilnik o zaštiti radnika od izloženosti opasnim kemikalijama na radu, graničnim vrijednostima izloženosti i biološkim graničnim vrijednostima
2+
OZNAKE U TABLICI
3+
– EZ broj – EINECS, ELINCS ili NLP slu žbeni je broj tvari u Europskoj uniji, kako je utvrđeno pod točkom 1.1.1.2., dio 1.
4+
Priloga VI. Uredbe (EZ) br. 1272/2008
5+
– CAS broj – registarski broj prema Međunarodnom popisu kemijskih tvari (engl. Chemical Abstract Service)
6+
Oznake za kolone »GVI« i »KGVI«:
7+
3 3 3
8+
– ppm – dijelova na milijun u volumenu zraka (ml/m i cm /m )
9+
3
10+
– mg/m – miligrami po kubičnom metru zraka pri 20 °C i 101,3 kPa (760 mm tlaka žive)
11+
3 3
12+
– vl/cm – broj vlakana po kubičnom centimetru (posebno je označeno ispod broja u koloni za »mg/m «)
13+
3
14+
– U – ukupna pra šina, inhalabilne čestice (sve vrijednosti u kolonama za »mg/m « odnose se na ukupnu pra šinu,
15+
posebno se označava samo u kombinaciji s oznakama za R-respirabilnu pra šinu)
16+
– R – respirabilna pra šina, frakcija koja udisanjem može doprijeti u pluća
17+
Oznake za kolonu »Direktiva«:
18+
– sve oznake odnose se na brojeve iz direktiva navedenih u članku 2. ovoga Pravilnika
19+
Oznake za kolonu »Napomena«:
20+
– odnose se na tvari koje se nalaze na popisu usklađenog razvrstavanja i označavanja u Prilogu VI. Uredbe (EZ) br.
21+
1272/2008, a navedene su samo sljedeći razredi i kategorije:
22+
– Karc 1A ili 1B
23+
– tvar koja je prema Uredbi (EZ) br. 1272/2008 razvrstana kao karcinogena 1.A ili 1.B kategorije
24+
– Muta 1A ili 1B
25+
– tvar koja je prema Uredbi (EZ) br. 1272/2008 razvrstana kao mutagena 1.A ili 1.B kategorije
26+
– Repr 1A ili 1B
27+
– tvar koja je prema Uredbi (EZ) br. 1272/2008 razvrstana kao reproduktivno toksična 1.A ili 1.B kategorije.
28+
– koža
29+
– razvrstana kao tvar koja nadra žuje kožu (H315)
30+
– ili je takva napomena navedena u direktivama
31+
– alergen koža
32+
– tvar koja može izazvati alergijsku reakciju na koži (H317)
33+
– alergen udisanjem
34+
– tvar koja udisanjem može izazvati simptome alergije ili astme ili poteškoće s disanjem (H334)
35+
GVI KGVI
36+
CAS EZ
37+
IME TVARI Direktiva Napomena
38+
broj broj
39+
ppm mg/m³ ppm mg/m³
40+
                 
41+
75-07-0 200-836-8 Acetaldehid 20 37 50 92
42+
108-24-7 203564-8 Acetanhidrid 0,5 2,5 2 10
43+
50-78-2 200-064-1 o-Acetil-salicilna kiselina 5
44+
67-64-1 200-662-2 Aceton 500 1210 2000/39/EZ
45+
Acetonitril;
46+
75-05-8 200-835-2 40 70 2006/15/EZ koža
47+
cijanometan
48+
koža, alergen koža,
49+
79-06-1 201-173-7 Akrilamid 0,3 2017/2398 Karc 1B,
50+
Muta 1B
51+
https://narodne-novine.nn.hr/clanci/sluzbeni/2018_10_91_1774.html 14/42

0 commit comments

Comments
 (0)