-
Notifications
You must be signed in to change notification settings - Fork 198
IMAGING-168 installing package with Swedish characters adds junk char… #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,8 @@ | |
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.nio.ByteOrder; | ||
| import java.nio.charset.Charset; | ||
| import java.nio.charset.IllegalCharsetNameException; | ||
| import java.util.ArrayList; | ||
| import java.util.Collections; | ||
| import java.util.Comparator; | ||
|
|
@@ -125,6 +127,9 @@ public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, | |
| protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verbose) | ||
| throws IOException { | ||
| final List<IptcRecord> elements = new ArrayList<IptcRecord>(); | ||
| final String DEFAULT_ENCODING = "ISO-8859-1"; | ||
| final int ENV_TAG_CODED_CHARACTER_SET = 90; | ||
| String characterName = DEFAULT_ENCODING; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why |
||
|
|
||
| int index = 0; | ||
| // Integer recordVersion = null; | ||
|
|
@@ -190,6 +195,11 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb | |
| // Debug.debug("recordSize", recordSize + " (0x" | ||
| // + Integer.toHexString(recordSize) + ")"); | ||
|
|
||
| if(recordNumber == IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER && recordType == ENV_TAG_CODED_CHARACTER_SET){ | ||
| characterName = getEncodingCharsetName(recordData); | ||
| continue; | ||
| } | ||
|
|
||
| if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) { | ||
| continue; | ||
| } | ||
|
|
@@ -226,7 +236,7 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb | |
| // continue; | ||
| // } | ||
|
|
||
| final String value = new String(recordData, "ISO-8859-1"); | ||
| final String value = new String(recordData, characterName); | ||
|
|
||
| final IptcType iptcType = IptcTypeLookup.getIptcType(recordType); | ||
|
|
||
|
|
@@ -248,6 +258,43 @@ protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verb | |
| return elements; | ||
| } | ||
|
|
||
| private String getEncodingCharsetName(byte[] codedCharacterSet){ | ||
| String codedCharacterSetString = new String(codedCharacterSet); | ||
| //byte[][] = getListOfEncoding | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove commented out code |
||
| try { | ||
| if (Charset.isSupported(codedCharacterSetString)) { | ||
| return codedCharacterSetString; | ||
| } | ||
| }catch (IllegalCharsetNameException e){ | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be worth to add at least one comment saying why these exceptions are not important, and that can be safely ignored. |
||
| }catch (IllegalArgumentException e){ | ||
|
|
||
| } | ||
| //check if encoding is a escape sequence | ||
| //normalize encoding byte sequence | ||
| byte[] codedCharacterSetNormalized = new byte[codedCharacterSet.length]; | ||
| int j=0; | ||
| for(int i=0; i< codedCharacterSet.length; i++){ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can be implemented using enhanced for-loop syntax |
||
| if(codedCharacterSet[i] != ' ') { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about: WHITESPACE.equals(codedCharacterSet[i])instead? (assuming you have defined a constant |
||
| codedCharacterSetNormalized[j++] = codedCharacterSet[i]; | ||
| } | ||
| } | ||
| for(CharsetEscapeSequence escapeSeq : CharsetEscapeSequence.getSupportedEscapeSeqList()){ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks fairly complicated given the fact that we only have one supported
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @britter removing this logic for now. We would add this later if we need to add multiple escape sequences. |
||
| if(j != escapeSeq.escapeSequence.length) continue; | ||
| boolean match = true; | ||
| for(int i=0; i < j; i++ ){ | ||
| if(codedCharacterSetNormalized[i] != escapeSeq.escapeSequence[i]){ | ||
| match = false; | ||
| break; | ||
| } | ||
| } | ||
| if(match){ | ||
| return escapeSeq.charsetName; | ||
| } | ||
| } | ||
| return "ISO-8859-1"; | ||
| } | ||
|
|
||
| protected List<IptcBlock> parseAllBlocks(final byte[] bytes, final boolean verbose, | ||
| final boolean strict) throws ImageReadException, IOException { | ||
| final List<IptcBlock> blocks = new ArrayList<IptcBlock>(); | ||
|
|
@@ -457,4 +504,20 @@ public int compare(final IptcRecord e1, final IptcRecord e2) { | |
| return blockData; | ||
| } | ||
|
|
||
| private static class CharsetEscapeSequence{ | ||
| byte[] escapeSequence; | ||
| String charsetName; | ||
|
|
||
| CharsetEscapeSequence(byte[] escapeSequence, String charsetName){ | ||
| this.escapeSequence = escapeSequence; | ||
| this.charsetName = charsetName; | ||
| } | ||
|
|
||
| static CharsetEscapeSequence[] getSupportedEscapeSeqList(){ | ||
| return new CharsetEscapeSequence[]{ | ||
| new CharsetEscapeSequence(new byte[]{'\u001B','%','G'}, "utf8") | ||
| }; | ||
| } | ||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,18 +38,22 @@ public IptcRecord(final IptcType iptcType, final byte[] bytes, final String valu | |
| this.value = value; | ||
| } | ||
|
|
||
| public IptcRecord(final IptcType iptcType, final String value) { | ||
| public IptcRecord(final IptcType iptcType, final String value, final String charsetName) { | ||
| this.iptcType = iptcType; | ||
| byte[] tempBytes; | ||
| try { | ||
| tempBytes = value.getBytes("ISO-8859-1"); | ||
| tempBytes = value.getBytes(charsetName); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Overall I think the code looks good to be merged. I would need to find some spec/docs for reference to clear up a few more questions about the issue. But the main issue right now are the conflicts. The Then we can resolve the conflicts and leave it ready to be merged 👍 |
||
| } catch (final UnsupportedEncodingException cannotHappen) { | ||
| tempBytes = null; | ||
| } | ||
| this.bytes = tempBytes; | ||
| this.value = value; | ||
| } | ||
|
|
||
| public IptcRecord(final IptcType iptcType, final String value) { | ||
| this(iptcType, value, "ISO-8859-1"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should better use a constant value.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 there is a constant defined in another class, but I wonder if we could use |
||
| } | ||
|
|
||
| public byte[] getRawBytes() { | ||
| return bytes.clone(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| package org.apache.commons.imaging.formats.jpeg.iptc; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add the Apache License Header
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @britter made changes as per suggestions. Please review now. |
||
|
|
||
| import org.apache.commons.imaging.ImagingTestConstants; | ||
| import org.apache.commons.imaging.common.ImageMetadata; | ||
| import org.apache.commons.imaging.common.bytesource.ByteSource; | ||
| import org.apache.commons.imaging.common.bytesource.ByteSourceFile; | ||
| import org.apache.commons.imaging.formats.jpeg.JpegImageParser; | ||
| import org.junit.Test; | ||
| import org.junit.runner.RunWith; | ||
| import org.junit.runners.Parameterized; | ||
|
|
||
| import java.io.File; | ||
| import java.nio.charset.Charset; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
|
|
||
| import static org.junit.Assert.fail; | ||
|
|
||
|
|
||
| @RunWith(Parameterized.class) | ||
| public class IptcCodedCharacterSetTest extends IptcBaseTest { | ||
|
|
||
| private File imageFile; | ||
|
|
||
| @Parameterized.Parameters | ||
| public static Collection<File> data() throws Exception { | ||
| return Collections.singleton(new File(ImagingTestConstants.TEST_IMAGE_FOLDER, "iptc/2/test.jpeg")); | ||
| } | ||
|
|
||
| public IptcCodedCharacterSetTest(File imageFile) { | ||
| this.imageFile = imageFile; | ||
| } | ||
|
|
||
| @Test | ||
| public void testCodedCharacterSet() throws Exception { | ||
| byte[] bytePatternToCompare = new byte[] | ||
| {-28,-68,-102,-26,-124,-113,-27,-83,-105}; | ||
|
|
||
| String requiredCaption = new String( bytePatternToCompare , "utf8"); | ||
| String metadataName = "Caption/Abstract"; | ||
|
|
||
| final ByteSource byteSource = new ByteSourceFile(imageFile); | ||
| JpegImageParser jpegImageParser = new JpegImageParser(); | ||
| ImageMetadata metadata = jpegImageParser.getMetadata(byteSource, null); | ||
| for (ImageMetadata.ImageMetadataItem item : metadata.getItems()) { | ||
| String metadataVal = item.toString(); | ||
| String[] metadataKeyValuePair = metadataVal.split(":", 2); | ||
| if (metadataKeyValuePair.length > 1 && metadataKeyValuePair[0].equalsIgnoreCase(metadataName) && !metadataKeyValuePair[1].trim().equals(requiredCaption)) { | ||
| fail("metadata extraction failed"); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing new line at EOF |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should both better be
private static finalconstants on this classThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe
ENV_TAG_CODED_CHARACTER_SETcould even be added toIptcConstants.