Skip to content

Commit 08c6c7a

Browse files
committed
PDFBOX-6125: LangAlt also not simple; skip empty attributes; add more tests
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1930679 13f79535-47bb-0310-9956-ffa450edef68
1 parent 8a40650 commit 08c6c7a

File tree

2 files changed

+88
-4
lines changed

2 files changed

+88
-4
lines changed

xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ private void parseDescriptionRootAttr(XMPMetadata xmp, Element description, Attr
366366
type = TypeMapping.createPropertyType(Types.Text, Cardinality.Simple);
367367
}
368368
}
369-
else if (!type.type().isSimple() || type.card().isArray())
369+
else if (!type.type().isSimple() || type.card().isArray() || type.type() == Types.LangAlt)
370370
{
371371
if (strictParsing)
372372
{
@@ -376,7 +376,12 @@ else if (!type.type().isSimple() || type.card().isArray())
376376
}
377377
else
378378
{
379-
// PDFBOX-6125: Default to text
379+
// PDFBOX-6125: Default to text or skip
380+
if (attr.getValue() == null || attr.getValue().isEmpty())
381+
{
382+
schema.removeAttribute(attr.getLocalName());
383+
return;
384+
}
380385
type = TypeMapping.createPropertyType(Types.Text, Cardinality.Simple);
381386
}
382387
}
@@ -1159,7 +1164,7 @@ else if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix()))
11591164
type = TypeMapping.createPropertyType(Types.Text, Cardinality.Simple);
11601165
}
11611166
}
1162-
else if (!type.type().isSimple() || type.card().isArray())
1167+
else if (!type.type().isSimple() || type.card().isArray() || type.type() == Types.LangAlt)
11631168
{
11641169
if (strictParsing)
11651170
{
@@ -1169,7 +1174,11 @@ else if (!type.type().isSimple() || type.card().isArray())
11691174
}
11701175
else
11711176
{
1172-
// PDFBOX-6125: Default to text
1177+
// PDFBOX-6125: Default to text or skip
1178+
if (attr.getValue() == null || attr.getValue().isEmpty())
1179+
{
1180+
continue;
1181+
}
11731182
type = TypeMapping.createPropertyType(Types.Text, Cardinality.Simple);
11741183
}
11751184
}

xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151

5252
import static org.junit.jupiter.api.Assertions.assertEquals;
5353
import static org.junit.jupiter.api.Assertions.assertNotNull;
54+
import static org.junit.jupiter.api.Assertions.assertNull;
5455
import static org.junit.jupiter.api.Assertions.assertThrows;
5556
import org.junit.jupiter.api.Test;
5657

@@ -818,6 +819,80 @@ void testBadAttr3() throws XmpParsingException, TransformerException
818819
assertEquals("[creator=TextType:Creator]", dublinCoreSchema.getProperty(DublinCoreSchema.CREATOR).toString());
819820
}
820821

822+
/**
823+
* Test empty attribute where an array is expected. The attribute is skipped in lenient mode.
824+
*
825+
* @throws XmpParsingException
826+
* @throws TransformerException
827+
* @throws BadFieldValueException
828+
*/
829+
@Test
830+
void testBadAttr4() throws XmpParsingException, TransformerException, BadFieldValueException
831+
{
832+
String s = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" +
833+
"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d' bytes='1206'?><rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >\n" +
834+
" <rdf:Description xmlns=\"http://purl.org/dc/elements/1.1/\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" about=\"\" dc:creator=\"\">\n" +
835+
" <dc:coverage>Coverage</dc:coverage>\n" +
836+
" </rdf:Description>\n" +
837+
"</rdf:RDF><?xpacket end='r'?>";
838+
final DomXmpParser xmpParser1 = new DomXmpParser();
839+
XmpParsingException ex = assertThrows(XmpParsingException.class,
840+
() -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
841+
assertEquals("The type 'Text' in 'dc:creator=' is a structured or array type, but attributes are simple types", ex.getMessage());
842+
DomXmpParser xmpParser2 = new DomXmpParser();
843+
xmpParser2.setStrictParsing(false);
844+
XMPMetadata xmp2 = xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
845+
DublinCoreSchema dublinCoreSchema2 = xmp2.getDublinCoreSchema();
846+
assertEquals("Coverage", dublinCoreSchema2.getCoverage());
847+
assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.CREATOR));
848+
XmpSerializer serializer = new XmpSerializer();
849+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
850+
serializer.serialize(xmp2, baos, true);
851+
DomXmpParser xmpParser3 = new DomXmpParser();
852+
xmpParser3.setStrictParsing(false);
853+
XMPMetadata xmp3 = xmpParser3.parse(baos.toByteArray());
854+
DublinCoreSchema dublinCoreSchema3 = xmp3.getDublinCoreSchema();
855+
assertEquals("Coverage", dublinCoreSchema3.getCoverage());
856+
assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.CREATOR));
857+
}
858+
859+
/**
860+
* Test empty attribute where an LangAlt is expected. The attribute is skipped in lenient mode.
861+
*
862+
* @throws XmpParsingException
863+
* @throws TransformerException
864+
* @throws BadFieldValueException
865+
*/
866+
@Test
867+
void testBadAttr5() throws XmpParsingException, TransformerException, BadFieldValueException
868+
{
869+
String s = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" +
870+
"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d' bytes='987'?><rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:iX=\"http://ns.adobe.com/iX/1.0/\">\n" +
871+
" <rdf:Description xmlns=\"http://purl.org/dc/elements/1.1/\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" about=\"\" dc:title=\"\" dc:coverage=\"COVER\"/>\n" +
872+
"</rdf:RDF><?xpacket end='r'?>";
873+
final DomXmpParser xmpParser1 = new DomXmpParser();
874+
XmpParsingException ex = assertThrows(XmpParsingException.class,
875+
() -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
876+
assertEquals("The type 'LangAlt' in 'dc:title=' is a structured or array type, but attributes are simple types", ex.getMessage());
877+
DomXmpParser xmpParser2 = new DomXmpParser();
878+
xmpParser2.setStrictParsing(false);
879+
XMPMetadata xmp2 = xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
880+
DublinCoreSchema dublinCoreSchema2 = xmp2.getDublinCoreSchema();
881+
assertNull(dublinCoreSchema2.getTitle());
882+
assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.TITLE));
883+
assertEquals("COVER", dublinCoreSchema2.getCoverage());
884+
XmpSerializer serializer = new XmpSerializer();
885+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
886+
serializer.serialize(xmp2, baos, true);
887+
DomXmpParser xmpParser3 = new DomXmpParser();
888+
xmpParser3.setStrictParsing(false);
889+
XMPMetadata xmp3 = xmpParser3.parse(baos.toByteArray());
890+
DublinCoreSchema dublinCoreSchema3 = xmp3.getDublinCoreSchema();
891+
assertNull(dublinCoreSchema3.getTitle());
892+
assertNull(dublinCoreSchema3.getProperty(DublinCoreSchema.TITLE));
893+
assertEquals("COVER", dublinCoreSchema3.getCoverage());
894+
}
895+
821896
@Test
822897
void testBadSchema() throws XmpParsingException
823898
{

0 commit comments

Comments
 (0)