Skip to content

Commit 4d95e03

Browse files
committed
PDFBOX-5902: cache CMap-string-mappings to avoid multiple instances of the very same string
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1922530 13f79535-47bb-0310-9956-ffa450edef68
1 parent cf4ffdf commit 4d95e03

File tree

2 files changed

+82
-1
lines changed

2 files changed

+82
-1
lines changed

fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,11 @@ private static boolean increment(byte[] data, int position, boolean useStrictMod
814814

815815
private static String createStringFromBytes(byte[] bytes)
816816
{
817-
return new String(bytes, bytes.length == 1 ? StandardCharsets.ISO_8859_1 : StandardCharsets.UTF_16BE);
817+
if (bytes.length <= 2)
818+
{
819+
return CMapStrings.getMapping(bytes);
820+
}
821+
return new String(bytes, StandardCharsets.UTF_16BE);
818822
}
819823

820824
/**
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* https://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.fontbox.cmap;
19+
20+
import java.nio.charset.StandardCharsets;
21+
import java.util.HashMap;
22+
import java.util.Map;
23+
24+
/**
25+
* Many CMaps are using the same values for the mapped strings. This class provides all common one- and two-byte
26+
* mappings to avoid duplicate strings.
27+
*/
28+
public class CMapStrings
29+
{
30+
private static final Map<Integer, String> twoByteMappings = new HashMap<>();
31+
private static final Map<Integer, String> oneByteMappings = new HashMap<>();
32+
33+
static
34+
{
35+
// create all mappings when loading the class to avoid concurrency issues
36+
fillMappings();
37+
}
38+
39+
private CMapStrings()
40+
{
41+
}
42+
43+
private static void fillMappings()
44+
{
45+
for (int i = 0; i < 256; i++)
46+
{
47+
for (int j = 0; j < 256; j++)
48+
{
49+
byte[] bytes = { (byte) i, (byte) j };
50+
twoByteMappings.put(CMap.toInt(bytes),
51+
new String(bytes, StandardCharsets.UTF_16BE));
52+
}
53+
}
54+
for (int i = 0; i < 256; i++)
55+
{
56+
byte[] bytes = { (byte) i };
57+
oneByteMappings.put(i, new String(bytes, StandardCharsets.ISO_8859_1));
58+
}
59+
}
60+
61+
/**
62+
* Get the mapped string value for the given combination of bytes. The mapping is limited to one and two-byte
63+
* mappings. Any longer byte sequence produces null as return value.
64+
*
65+
* @param bytes the given combination of bytes
66+
* @return the string representation for the given combination of bytes
67+
*/
68+
public static String getMapping(byte[] bytes)
69+
{
70+
if (bytes.length > 2)
71+
{
72+
return null;
73+
}
74+
return bytes.length == 1 ? oneByteMappings.get(CMap.toInt(bytes))
75+
: twoByteMappings.get(CMap.toInt(bytes));
76+
}
77+
}

0 commit comments

Comments
 (0)