Skip to content

Commit 47e688d

Browse files
committed
PDFBOX-5961: support 3 / 4 byte input values
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1923977 13f79535-47bb-0310-9956-ffa450edef68
1 parent 6652b7a commit 47e688d

File tree

1 file changed

+37
-5
lines changed
  • fontbox/src/main/java/org/apache/fontbox/cmap

1 file changed

+37
-5
lines changed

fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ public class CMap
5757
private final Map<Integer, String> charToUnicodeOneByte = new HashMap<>();
5858
// two byte input values
5959
private final Map<Integer, String> charToUnicodeTwoBytes = new HashMap<>();
60+
// 3 / 4 byte input values
61+
private final Map<Integer, String> charToUnicodeMoreBytes = new HashMap<>();
6062

6163
// CID mappings
6264
// map with all code to cid mappings organized by the origin byte length of the input value
@@ -93,7 +95,7 @@ public boolean hasCIDMappings()
9395
*/
9496
public boolean hasUnicodeMappings()
9597
{
96-
return !charToUnicodeOneByte.isEmpty() || !charToUnicodeTwoBytes.isEmpty();
98+
return !charToUnicodeOneByte.isEmpty() || !charToUnicodeTwoBytes.isEmpty() || !charToUnicodeMoreBytes.isEmpty();
9799
}
98100

99101
/**
@@ -113,7 +115,15 @@ public String toUnicode(int code)
113115
String unicode = code < 256 ? toUnicode(code, 1) : null;
114116
if (unicode == null)
115117
{
116-
unicode = toUnicode(code, 2);
118+
if (code <= 0xFFFF)
119+
{
120+
return toUnicode(code, 2);
121+
}
122+
if (code <= 0xFFFFFF)
123+
{
124+
return toUnicode(code, 3);
125+
}
126+
return toUnicode(code, 4);
117127
}
118128
return unicode;
119129
}
@@ -135,8 +145,7 @@ public String toUnicode(int code, int length)
135145
{
136146
return charToUnicodeTwoBytes.get(code);
137147
}
138-
LOG.warn("Mappings with more than 2 bytes aren't supported");
139-
return null;
148+
return charToUnicodeMoreBytes.get(code);
140149
}
141150

142151
/**
@@ -350,9 +359,14 @@ else if (codes.length == 2)
350359
charToUnicodeTwoBytes.put(CMapStrings.getIndexValue(codes), unicode);
351360
unicodeToByteCodes.put(unicode, CMapStrings.getByteValue(codes));
352361
}
362+
else if (codes.length == 3 || codes.length == 4)
363+
{
364+
charToUnicodeMoreBytes.put(toInt(codes), unicode);
365+
unicodeToByteCodes.put(unicode, codes.clone());
366+
}
353367
else
354368
{
355-
LOG.warn("Mappings with more than 2 bytes aren't supported yet");
369+
LOG.warn("Mappings with more than 4 bytes (here: {}) aren't supported yet", codes.length);
356370
}
357371
// fixme: ugly little hack
358372
if (SPACE.equals(unicode))
@@ -442,10 +456,28 @@ void useCmap(CMap cmap)
442456
cmap.codespaceRanges.forEach(this::addCodespaceRange);
443457
charToUnicodeOneByte.putAll(cmap.charToUnicodeOneByte);
444458
charToUnicodeTwoBytes.putAll(cmap.charToUnicodeTwoBytes);
459+
charToUnicodeMoreBytes.putAll(cmap.charToUnicodeMoreBytes);
445460
cmap.charToUnicodeOneByte.forEach((k, v) -> unicodeToByteCodes.put(v, new byte[]{(byte) (k % 0xFF)}));
446461
cmap.charToUnicodeTwoBytes.forEach((k, v) -> unicodeToByteCodes.put(v,
447462
new byte[]{(byte) ((k >>> 8) & 0xFF), (byte) (k & 0xFF)})
448463
);
464+
cmap.charToUnicodeMoreBytes.forEach((k, v) ->
465+
{
466+
byte[] bar;
467+
if (k <= 0xFFFFFF)
468+
{
469+
// 3 bytes
470+
bar = new byte[]{(byte) ((k >>> 16) & 0xFF), (byte) ((k >>> 8) & 0xFF),
471+
(byte) (k & 0xFF)};
472+
}
473+
else
474+
{
475+
// 4 bytes
476+
bar = new byte[]{(byte) ((k >>> 24) & 0xFF), (byte) ((k >>> 16) & 0xFF),
477+
(byte) ((k >>> 8) & 0xFF), (byte) (k & 0xFF)};
478+
}
479+
unicodeToByteCodes.put(v, bar);
480+
});
449481
cmap.codeToCid.forEach((key, value) ->
450482
{
451483
Map<Integer, Integer> existingMapping = codeToCid.putIfAbsent(key, value);

0 commit comments

Comments
 (0)