Skip to content

Commit c3f409f

Browse files
committed
PDFBOX-6123: support Seq / Bag mixup in lenient mode + test
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1930584 13f79535-47bb-0310-9956-ffa450edef68
1 parent 6d41570 commit c3f409f

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ private void manageArray(XMPMetadata xmp, Element property, PropertyType type, C
523523
+ whatFound
524524
+ " [prefix=" + prefix + "; name=" + name + "]");
525525
}
526-
if (!bagOrSeq.getLocalName().equals(type.card().name()))
526+
if (strictParsing && !bagOrSeq.getLocalName().equals(type.card().name()))
527527
{
528528
// not the good array type
529529
throw new XmpParsingException(ErrorType.Format, "Invalid array type, expecting " + type.card()

xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.List;
2929

3030
import org.apache.xmpbox.XMPMetadata;
31+
import org.apache.xmpbox.schema.DublinCoreSchema;
3132
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
3233
import org.apache.xmpbox.schema.PhotoshopSchema;
3334
import org.apache.xmpbox.schema.XMPMediaManagementSchema;
@@ -517,4 +518,41 @@ void testPDFBox5292() throws XmpParsingException, BadFieldValueException
517518
String dataValue = xmp.getSchema("http://ns.example.org/default/1.0/").getUnqualifiedTextPropertyValue("Data");
518519
assertEquals("Example", dataValue);
519520
}
521+
522+
/**
523+
* Test that a Seq / Mag mixup gets detected in strict mode and gets read in lenient mode.
524+
* @throws XmpParsingException
525+
*/
526+
@Test
527+
void testLenientBagSeqMixup() throws XmpParsingException
528+
{
529+
String s = "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
530+
"<?adobe-xap-filters esc=\"CRLF\"?>\n" +
531+
"<x:xmpmeta xmlns:x='adobe:ns:meta/'>\n" +
532+
" <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>\n" +
533+
" <rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'\n" +
534+
" dc:format='application/pdf'>\n" +
535+
" <dc:subject>\n" +
536+
" <rdf:Seq>\n" +
537+
" <rdf:li>Important subject</rdf:li>\n" +
538+
" <rdf:li>Unimportant subject</rdf:li>\n" +
539+
" </rdf:Seq>\n" +
540+
" </dc:subject>\n" +
541+
" </rdf:Description>\n" +
542+
" </rdf:RDF>\n" +
543+
"</x:xmpmeta>\n" +
544+
"<?xpacket end='w'?>";
545+
XmpParsingException ex = assertThrows(
546+
XmpParsingException.class,
547+
() -> new DomXmpParser().parse(s.getBytes(StandardCharsets.UTF_8)));
548+
assertEquals("Invalid array type, expecting Bag and found Seq [prefix=dc; name=subject]", ex.getMessage());
549+
DomXmpParser xmpParser = new DomXmpParser();
550+
xmpParser.setStrictParsing(false);
551+
XMPMetadata xmp = xmpParser.parse(s.getBytes(StandardCharsets.UTF_8));
552+
DublinCoreSchema dublinCoreSchema = xmp.getDublinCoreSchema();
553+
List<String> subjects = dublinCoreSchema.getSubjects();
554+
assertEquals(2, subjects.size());
555+
assertEquals("Important subject", subjects.get(0));
556+
assertEquals("Unimportant subject", subjects.get(1));
557+
}
520558
}

0 commit comments

Comments
 (0)