Skip to content

Commit bcc8f6c

Browse files
committed
PDFBOX-5997: avoid creation of temporary objects as proposed by Axel Howind
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1926545 13f79535-47bb-0310-9956-ffa450edef68
1 parent 28d4312 commit bcc8f6c

File tree

2 files changed

+76
-25
lines changed

2 files changed

+76
-25
lines changed

pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -135,36 +135,60 @@ public COSString(String text, boolean forceHex)
135135
*/
136136
public static COSString parseHex(String hex) throws IOException
137137
{
138-
StringBuilder hexBuffer = new StringBuilder(hex.trim());
139-
140-
// if odd number then the last hex digit is assumed to be 0
141-
if (hexBuffer.length() % 2 != 0)
138+
// skip leading and trailing whitespace
139+
int end = hex.length();
140+
while (end > 0 && Character.isWhitespace(hex.charAt(end - 1)))
141+
{
142+
end--;
143+
}
144+
int start = 0;
145+
while (start < end && Character.isWhitespace(hex.charAt(start)))
142146
{
143-
hexBuffer.append('0');
147+
start++;
144148
}
145149

146-
int length = hexBuffer.length();
150+
int length = end - start;
147151
ByteArrayOutputStream bytes = new ByteArrayOutputStream((length + 1) / 2);
152+
153+
boolean isLengthUneven = length % 2 != 0;
154+
if (isLengthUneven)
155+
{
156+
length--;
157+
}
148158
for (int i = 0; i < length; i += 2)
149159
{
150-
try
160+
int value = 16 * Hex.getHexValue(hex.charAt(i)) + Hex.getHexValue(hex.charAt(i + 1));
161+
if (value >= 0)
162+
{
163+
bytes.write(value);
164+
}
165+
else if (FORCE_PARSING)
151166
{
152-
bytes.write(Integer.parseInt(hexBuffer.substring(i, i + 2), 16));
167+
LOG.warn("Encountered a malformed hex string");
168+
bytes.write('?'); // todo: what does Acrobat do? Any example PDFs?
153169
}
154-
catch (NumberFormatException e)
170+
else
155171
{
156-
if (FORCE_PARSING)
157-
{
158-
LOG.warn("Encountered a malformed hex string");
159-
bytes.write('?'); // todo: what does Acrobat do? Any example PDFs?
160-
}
161-
else
162-
{
163-
throw new IOException("Invalid hex string: " + hex, e);
164-
}
172+
throw new IOException("Invalid hex string: " + hex);
173+
}
174+
}
175+
if (isLengthUneven)
176+
{
177+
int value = 16 * Hex.getHexValue(hex.charAt(length));
178+
if (value >= 0)
179+
{
180+
bytes.write(value);
181+
}
182+
else if (FORCE_PARSING)
183+
{
184+
LOG.warn("Encountered a malformed hex string");
185+
bytes.write('?'); // todo: what does Acrobat do? Any example PDFs?
186+
}
187+
else
188+
{
189+
throw new IOException("Invalid hex string: " + hex);
165190
}
166191
}
167-
168192
return new COSString(bytes.toByteArray());
169193
}
170194

pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,19 +228,46 @@ public static byte[] decodeHex(String s)
228228
}
229229
else
230230
{
231-
String hexByte = s.substring(i, i + 2);
232-
try
231+
int value = 16 * getHexValue(s.charAt(i)) + getHexValue(s.charAt(i + 1));
232+
if (value >= 0)
233233
{
234-
baos.write(Integer.parseInt(hexByte, 16)); // Byte.parseByte won't work with "9C"
234+
baos.write(value);
235235
}
236-
catch (NumberFormatException ex)
236+
else
237237
{
238-
LOG.error(() -> "Can't parse " + hexByte + ", aborting decode", ex);
239-
break;
238+
String hexByte = s.substring(i, i + 2);
239+
LOG.error("Can't parse " + hexByte + ", aborting decode");
240240
}
241241
i += 2;
242242
}
243243
}
244244
return baos.toByteArray();
245245
}
246+
247+
/**
248+
* Converts a given character to its corresponding hexadecimal value. Valid characters are '0'-'9', 'A'-'F', or
249+
* 'a'-'f'. Returns -256 for invalid characters.
250+
* <p>
251+
* The value of -256 is chosen so that to hex digits can be combined before checking for an invalid hex string
252+
*
253+
* @param c the character to be converted to a hexadecimal value
254+
* @return the hexadecimal value of the character, or -256 if the character is invalid
255+
*/
256+
public static int getHexValue(char c)
257+
{
258+
if (c >= '0' && c <= '9')
259+
{
260+
return c - '0';
261+
}
262+
else if (c >= 'A' && c <= 'F')
263+
{
264+
return c - 'A' + 10;
265+
}
266+
else if (c >= 'a' && c <= 'f')
267+
{
268+
return c - 'a' + 10;
269+
}
270+
return -256;
271+
}
272+
246273
}

0 commit comments

Comments
 (0)