2222import java .io .IOException ;
2323import java .io .InputStream ;
2424import java .nio .ByteOrder ;
25+ import java .nio .charset .Charset ;
26+ import java .nio .charset .IllegalCharsetNameException ;
2527import java .util .ArrayList ;
2628import java .util .Collections ;
2729import java .util .Comparator ;
@@ -125,6 +127,9 @@ public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes,
125127 protected List <IptcRecord > parseIPTCBlock (final byte [] bytes , final boolean verbose )
126128 throws IOException {
127129 final List <IptcRecord > elements = new ArrayList <IptcRecord >();
130+ final String DEFAULT_ENCODING = "ISO-8859-1" ;
131+ final int ENV_TAG_CODED_CHARACTER_SET = 90 ;
132+ String characterName = DEFAULT_ENCODING ;
128133
129134 int index = 0 ;
130135 // Integer recordVersion = null;
@@ -190,6 +195,11 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
190195 // Debug.debug("recordSize", recordSize + " (0x"
191196 // + Integer.toHexString(recordSize) + ")");
192197
198+ if (recordNumber == IptcConstants .IPTC_ENVELOPE_RECORD_NUMBER && recordType == ENV_TAG_CODED_CHARACTER_SET ){
199+ characterName = getEncodingCharsetName (recordData );
200+ continue ;
201+ }
202+
193203 if (recordNumber != IptcConstants .IPTC_APPLICATION_2_RECORD_NUMBER ) {
194204 continue ;
195205 }
@@ -226,7 +236,7 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
226236 // continue;
227237 // }
228238
229- final String value = new String (recordData , "ISO-8859-1" );
239+ final String value = new String (recordData , characterName );
230240
231241 final IptcType iptcType = IptcTypeLookup .getIptcType (recordType );
232242
@@ -248,6 +258,43 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb
248258 return elements ;
249259 }
250260
261+ private String getEncodingCharsetName (byte [] codedCharacterSet ){
262+ String codedCharacterSetString = new String (codedCharacterSet );
263+ //byte[][] = getListOfEncoding
264+ try {
265+ if (Charset .isSupported (codedCharacterSetString )) {
266+ return codedCharacterSetString ;
267+ }
268+ }catch (IllegalCharsetNameException e ){
269+
270+ }catch (IllegalArgumentException e ){
271+
272+ }
273+ //check if encoding is a escape sequence
274+ //normalize encoding byte sequence
275+ byte [] codedCharacterSetNormalized = new byte [codedCharacterSet .length ];
276+ int j =0 ;
277+ for (int i =0 ; i < codedCharacterSet .length ; i ++){
278+ if (codedCharacterSet [i ] != ' ' ) {
279+ codedCharacterSetNormalized [j ++] = codedCharacterSet [i ];
280+ }
281+ }
282+ for (CharsetEscapeSequence escapeSeq : CharsetEscapeSequence .getSupportedEscapeSeqList ()){
283+ if (j != escapeSeq .escapeSequence .length ) continue ;
284+ boolean match = true ;
285+ for (int i =0 ; i < j ; i ++ ){
286+ if (codedCharacterSetNormalized [i ] != escapeSeq .escapeSequence [i ]){
287+ match = false ;
288+ break ;
289+ }
290+ }
291+ if (match ){
292+ return escapeSeq .charsetName ;
293+ }
294+ }
295+ return "ISO-8859-1" ;
296+ }
297+
251298 protected List <IptcBlock > parseAllBlocks (final byte [] bytes , final boolean verbose ,
252299 final boolean strict ) throws ImageReadException , IOException {
253300 final List <IptcBlock > blocks = new ArrayList <IptcBlock >();
@@ -457,4 +504,20 @@ public int compare(final IptcRecord e1, final IptcRecord e2) {
457504 return blockData ;
458505 }
459506
507+ private static class CharsetEscapeSequence {
508+ byte [] escapeSequence ;
509+ String charsetName ;
510+
511+ CharsetEscapeSequence (byte [] escapeSequence , String charsetName ){
512+ this .escapeSequence = escapeSequence ;
513+ this .charsetName = charsetName ;
514+ }
515+
516+ static CharsetEscapeSequence [] getSupportedEscapeSeqList (){
517+ return new CharsetEscapeSequence []{
518+ new CharsetEscapeSequence (new byte []{'\u001B' ,'%' ,'G' }, "utf8" )
519+ };
520+ }
521+ }
522+
460523}
0 commit comments