apache · khemkasudeep · Aug 3, 2015 · Sep 30, 2015 · britter · Sep 29, 2015
diff --git a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
@@ -22,6 +22,8 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteOrder;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -125,6 +127,9 @@ public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes,
     protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verbose)
             throws IOException {
         final List<IptcRecord> elements = new ArrayList<IptcRecord>();
+        final String DEFAULT_ENCODING = "ISO-8859-1";
+        final int ENV_TAG_CODED_CHARACTER_SET = 90;
+        String characterName = DEFAULT_ENCODING;
 
         int index = 0;
         // Integer recordVersion = null;
@@ -190,6 +195,11 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
             // Debug.debug("recordSize", recordSize + " (0x"
             // + Integer.toHexString(recordSize) + ")");
 
+            if(recordNumber == IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER && recordType == ENV_TAG_CODED_CHARACTER_SET){
+                characterName = getEncodingCharsetName(recordData);
+                continue;
+            }
+
             if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) {
                 continue;
             }
@@ -226,7 +236,7 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
             // continue;
             // }
 
-            final String value = new String(recordData, "ISO-8859-1");
+            final String value = new String(recordData, characterName);
 
             final IptcType iptcType = IptcTypeLookup.getIptcType(recordType);
 
@@ -248,6 +258,43 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
         return elements;
     }
 
+    private String getEncodingCharsetName(byte[] codedCharacterSet){
+        String codedCharacterSetString = new String(codedCharacterSet);
+        //byte[][] = getListOfEncoding
+        try {
+            if (Charset.isSupported(codedCharacterSetString)) {
+                return codedCharacterSetString;
+            }
+        }catch (IllegalCharsetNameException e){
+
+        }catch (IllegalArgumentException e){
+
+        }
+        //check if encoding is a escape sequence
+        //normalize encoding byte sequence
+        byte[] codedCharacterSetNormalized = new byte[codedCharacterSet.length];
+        int j=0;
+        for(int i=0; i< codedCharacterSet.length; i++){
+            if(codedCharacterSet[i] != ' ') {
+                codedCharacterSetNormalized[j++] = codedCharacterSet[i];
+            }
+        }
+        for(CharsetEscapeSequence escapeSeq : CharsetEscapeSequence.getSupportedEscapeSeqList()){
+            if(j != escapeSeq.escapeSequence.length) continue;
+            boolean match = true;
+            for(int i=0; i < j; i++ ){
+                if(codedCharacterSetNormalized[i] != escapeSeq.escapeSequence[i]){
+                    match = false;
+                    break;
+                }
+            }
+            if(match){
+                return escapeSeq.charsetName;
+            }
+        }
+        return "ISO-8859-1";
+    }
+
     protected List<IptcBlock> parseAllBlocks(final byte[] bytes, final boolean verbose,
             final boolean strict) throws ImageReadException, IOException {
         final List<IptcBlock> blocks = new ArrayList<IptcBlock>();
@@ -457,4 +504,20 @@ public int compare(final IptcRecord e1, final IptcRecord e2) {
         return blockData;
     }
 
+    private static class CharsetEscapeSequence{
+        byte[] escapeSequence;
+        String charsetName;
+
+        CharsetEscapeSequence(byte[] escapeSequence, String charsetName){
+            this.escapeSequence = escapeSequence;
+            this.charsetName = charsetName;
+        }
+
+        static CharsetEscapeSequence[] getSupportedEscapeSeqList(){
+            return new CharsetEscapeSequence[]{
+                    new CharsetEscapeSequence(new byte[]{'\u001B','%','G'}, "utf8")
+            };
+        }
+    }
+
 }
diff --git a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcRecord.java b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcRecord.java
@@ -38,18 +38,22 @@ public IptcRecord(final IptcType iptcType, final byte[] bytes, final String valu
         this.value = value;
     }
 
-    public IptcRecord(final IptcType iptcType, final String value) {
+    public IptcRecord(final IptcType iptcType, final String value, final String charsetName) {
         this.iptcType = iptcType;
         byte[] tempBytes;
         try {
-            tempBytes = value.getBytes("ISO-8859-1");
+            tempBytes = value.getBytes(charsetName);
         } catch (final UnsupportedEncodingException cannotHappen) {
             tempBytes = null;
         }
         this.bytes = tempBytes;
         this.value = value;
     }
 
+    public IptcRecord(final IptcType iptcType, final String value) {
+        this(iptcType, value, "ISO-8859-1");
+    }
+
     public byte[] getRawBytes() {
         return bytes.clone();
     }

diff --git a/src/test/data/images/iptc/2/test.jpeg b/src/test/data/images/iptc/2/test.jpeg
diff --git a/src/test/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcCodedCharacterSetTest.java b/src/test/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcCodedCharacterSetTest.java
@@ -0,0 +1,53 @@
+package org.apache.commons.imaging.formats.jpeg.iptc;
+
+import org.apache.commons.imaging.ImagingTestConstants;
+import org.apache.commons.imaging.common.ImageMetadata;
+import org.apache.commons.imaging.common.bytesource.ByteSource;
+import org.apache.commons.imaging.common.bytesource.ByteSourceFile;
+import org.apache.commons.imaging.formats.jpeg.JpegImageParser;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.File;
+import java.nio.charset.Charset;
+import java.util.Collection;
+import java.util.Collections;
+
+import static org.junit.Assert.fail;
+
+
+@RunWith(Parameterized.class)
+public class IptcCodedCharacterSetTest extends IptcBaseTest {
+
+    private File imageFile;
+
+    @Parameterized.Parameters
+    public static Collection<File> data() throws Exception {
+        return Collections.singleton(new File(ImagingTestConstants.TEST_IMAGE_FOLDER, "iptc/2/test.jpeg"));
+    }
+
+    public IptcCodedCharacterSetTest(File imageFile) {
+        this.imageFile = imageFile;
+    }
+
+    @Test
+    public void testCodedCharacterSet() throws Exception {
+        byte[] bytePatternToCompare = new byte[]
+                {-28,-68,-102,-26,-124,-113,-27,-83,-105};
+
+        String requiredCaption = new String( bytePatternToCompare , "utf8");
+        String metadataName = "Caption/Abstract";
+
+        final ByteSource byteSource = new ByteSourceFile(imageFile);
+        JpegImageParser jpegImageParser = new JpegImageParser();
+        ImageMetadata metadata = jpegImageParser.getMetadata(byteSource, null);
+        for (ImageMetadata.ImageMetadataItem item : metadata.getItems()) {
+            String metadataVal = item.toString();
+            String[] metadataKeyValuePair = metadataVal.split(":", 2);
+            if (metadataKeyValuePair.length > 1 && metadataKeyValuePair[0].equalsIgnoreCase(metadataName) && !metadataKeyValuePair[1].trim().equals(requiredCaption)) {
+                fail("metadata extraction failed");
+            }
+        }
+    }
+}