Skip to content

Commit 2752dd7

Browse files
author
khemka
committed
IMAGING-168 installing package with Swedish characters adds junk characters to dc:title property
1 parent 1f9de79 commit 2752dd7

File tree

4 files changed

+123
-3
lines changed

4 files changed

+123
-3
lines changed

src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import java.io.IOException;
2323
import java.io.InputStream;
2424
import java.nio.ByteOrder;
25+
import java.nio.charset.Charset;
26+
import java.nio.charset.IllegalCharsetNameException;
2527
import java.util.ArrayList;
2628
import java.util.Collections;
2729
import java.util.Comparator;
@@ -125,6 +127,9 @@ public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes,
125127
protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verbose)
126128
throws IOException {
127129
final List<IptcRecord> elements = new ArrayList<IptcRecord>();
130+
final String DEFAULT_ENCODING = "ISO-8859-1";
131+
final int ENV_TAG_CODED_CHARACTER_SET = 90;
132+
String characterName = DEFAULT_ENCODING;
128133

129134
int index = 0;
130135
// Integer recordVersion = null;
@@ -190,6 +195,11 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
190195
// Debug.debug("recordSize", recordSize + " (0x"
191196
// + Integer.toHexString(recordSize) + ")");
192197

198+
if(recordNumber == IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER && recordType == ENV_TAG_CODED_CHARACTER_SET){
199+
characterName = getEncodingCharsetName(recordData);
200+
continue;
201+
}
202+
193203
if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) {
194204
continue;
195205
}
@@ -226,7 +236,7 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
226236
// continue;
227237
// }
228238

229-
final String value = new String(recordData, "ISO-8859-1");
239+
final String value = new String(recordData, characterName);
230240

231241
final IptcType iptcType = IptcTypeLookup.getIptcType(recordType);
232242

@@ -248,6 +258,43 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
248258
return elements;
249259
}
250260

261+
private String getEncodingCharsetName(byte[] codedCharacterSet){
262+
String codedCharacterSetString = new String(codedCharacterSet);
263+
//byte[][] = getListOfEncoding
264+
try {
265+
if (Charset.isSupported(codedCharacterSetString)) {
266+
return codedCharacterSetString;
267+
}
268+
}catch (IllegalCharsetNameException e){
269+
270+
}catch (IllegalArgumentException e){
271+
272+
}
273+
//check if encoding is a escape sequence
274+
//normalize encoding byte sequence
275+
byte[] codedCharacterSetNormalized = new byte[codedCharacterSet.length];
276+
int j=0;
277+
for(int i=0; i< codedCharacterSet.length; i++){
278+
if(codedCharacterSet[i] != ' ') {
279+
codedCharacterSetNormalized[j++] = codedCharacterSet[i];
280+
}
281+
}
282+
for(CharsetEscapeSequence escapeSeq : CharsetEscapeSequence.getSupportedEscapeSeqList()){
283+
if(j != escapeSeq.escapeSequence.length) continue;
284+
boolean match = true;
285+
for(int i=0; i < j; i++ ){
286+
if(codedCharacterSetNormalized[i] != escapeSeq.escapeSequence[i]){
287+
match = false;
288+
break;
289+
}
290+
}
291+
if(match){
292+
return escapeSeq.charsetName;
293+
}
294+
}
295+
return "ISO-8859-1";
296+
}
297+
251298
protected List<IptcBlock> parseAllBlocks(final byte[] bytes, final boolean verbose,
252299
final boolean strict) throws ImageReadException, IOException {
253300
final List<IptcBlock> blocks = new ArrayList<IptcBlock>();
@@ -457,4 +504,20 @@ public int compare(final IptcRecord e1, final IptcRecord e2) {
457504
return blockData;
458505
}
459506

507+
private static class CharsetEscapeSequence{
508+
byte[] escapeSequence;
509+
String charsetName;
510+
511+
CharsetEscapeSequence(byte[] escapeSequence, String charsetName){
512+
this.escapeSequence = escapeSequence;
513+
this.charsetName = charsetName;
514+
}
515+
516+
static CharsetEscapeSequence[] getSupportedEscapeSeqList(){
517+
return new CharsetEscapeSequence[]{
518+
new CharsetEscapeSequence(new byte[]{'\u001B','%','G'}, "utf8")
519+
};
520+
}
521+
}
522+
460523
}

src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcRecord.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,22 @@ public IptcRecord(final IptcType iptcType, final byte[] bytes, final String valu
3838
this.value = value;
3939
}
4040

41-
public IptcRecord(final IptcType iptcType, final String value) {
41+
public IptcRecord(final IptcType iptcType, final String value, final String charsetName) {
4242
this.iptcType = iptcType;
4343
byte[] tempBytes;
4444
try {
45-
tempBytes = value.getBytes("ISO-8859-1");
45+
tempBytes = value.getBytes(charsetName);
4646
} catch (final UnsupportedEncodingException cannotHappen) {
4747
tempBytes = null;
4848
}
4949
this.bytes = tempBytes;
5050
this.value = value;
5151
}
5252

53+
public IptcRecord(final IptcType iptcType, final String value) {
54+
this(iptcType, value, "ISO-8859-1");
55+
}
56+
5357
public byte[] getRawBytes() {
5458
return bytes.clone();
5559
}
3.89 KB
Loading
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package org.apache.commons.imaging.formats.jpeg.iptc;
2+
3+
import org.apache.commons.imaging.ImagingTestConstants;
4+
import org.apache.commons.imaging.common.ImageMetadata;
5+
import org.apache.commons.imaging.common.bytesource.ByteSource;
6+
import org.apache.commons.imaging.common.bytesource.ByteSourceFile;
7+
import org.apache.commons.imaging.formats.jpeg.JpegImageParser;
8+
import org.junit.Test;
9+
import org.junit.runner.RunWith;
10+
import org.junit.runners.Parameterized;
11+
12+
import java.io.File;
13+
import java.nio.charset.Charset;
14+
import java.util.Collection;
15+
import java.util.Collections;
16+
17+
import static org.junit.Assert.fail;
18+
19+
20+
@RunWith(Parameterized.class)
21+
public class IptcCodedCharacterSetTest extends IptcBaseTest {
22+
23+
private File imageFile;
24+
25+
@Parameterized.Parameters
26+
public static Collection<File> data() throws Exception {
27+
return Collections.singleton(new File(ImagingTestConstants.TEST_IMAGE_FOLDER, "iptc/2/test.jpeg"));
28+
}
29+
30+
public IptcCodedCharacterSetTest(File imageFile) {
31+
this.imageFile = imageFile;
32+
}
33+
34+
@Test
35+
public void testCodedCharacterSet() throws Exception {
36+
byte[] bytePatternToCompare = new byte[]
37+
{-28,-68,-102,-26,-124,-113,-27,-83,-105};
38+
39+
String requiredCaption = new String( bytePatternToCompare , "utf8");
40+
String metadataName = "Caption/Abstract";
41+
42+
final ByteSource byteSource = new ByteSourceFile(imageFile);
43+
JpegImageParser jpegImageParser = new JpegImageParser();
44+
ImageMetadata metadata = jpegImageParser.getMetadata(byteSource, null);
45+
for (ImageMetadata.ImageMetadataItem item : metadata.getItems()) {
46+
String metadataVal = item.toString();
47+
String[] metadataKeyValuePair = metadataVal.split(":", 2);
48+
if (metadataKeyValuePair.length > 1 && metadataKeyValuePair[0].equalsIgnoreCase(metadataName) && !metadataKeyValuePair[1].trim().equals(requiredCaption)) {
49+
fail("metadata extraction failed");
50+
}
51+
}
52+
}
53+
}

0 commit comments

Comments
 (0)