Skip to content

Commit 8a40650

Browse files
committed
PDFBOX-6127: look for non standard namespaces in rdf:RDF
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1930669 13f79535-47bb-0310-9956-ffa450edef68
1 parent 82423fa commit 8a40650

File tree

2 files changed

+70
-7
lines changed

2 files changed

+70
-7
lines changed

xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,24 @@ public XMPMetadata parse(InputStream input) throws XmpParsingException
192192
// Now, parse the content of root
193193
Element rdfRdf = findDescriptionsParent(root);
194194
nsFinder.push(rdfRdf); // PDFBOX-6099: push namespaces in rdf:RDF
195+
196+
// PDFBOX-6127: look for non standard namespaces (similar to PDFBOX-2378)
197+
if (!strictParsing)
198+
{
199+
NamedNodeMap nnm = rdfRdf.getAttributes();
200+
if (nnm != null)
201+
{
202+
for (int i = 0; i < nnm.getLength(); i++)
203+
{
204+
Attr attr = (Attr) nnm.item(i);
205+
if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
206+
{
207+
maybeAddNonStandardNamespace(xmp, attr);
208+
}
209+
}
210+
}
211+
}
212+
195213
List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
196214
for (Element description : descriptions)
197215
{
@@ -212,6 +230,23 @@ public XMPMetadata parse(InputStream input) throws XmpParsingException
212230
return xmp;
213231
}
214232

233+
private void maybeAddNonStandardNamespace(XMPMetadata xmp, Attr attr)
234+
{
235+
// xmlns:prefix="namespace"
236+
TypeMapping tm = xmp.getTypeMapping();
237+
String namespace = attr.getValue();
238+
if (!XmpConstants.RDF_NAMESPACE.equals(namespace) &&
239+
!tm.isStructuredTypeNamespace(namespace) &&
240+
xmp.getSchema(namespace) == null && tm.getSchemaFactory(namespace) == null)
241+
{
242+
// PDFBOX-5128 / PDFBOX-6127: Add the schema on the fly if it can't be found
243+
// PDFBOX-5649: But only if the namespace isn't already known
244+
// because this adds a namespace without property descriptions
245+
// PDFBOX-6127: never rdf
246+
tm.addNewNameSpace(namespace, attr.getLocalName());
247+
}
248+
}
249+
215250
private boolean isSchemaExtensionProperty(final Element element)
216251
{
217252
return element != null && "pdfaExtension".equals(element.getPrefix());
@@ -279,14 +314,9 @@ else if (attr.getPrefix() == null && XmpConstants.ABOUT_NAME.equals(attr.getLoca
279314
}
280315
else if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
281316
{
282-
String namespace = attr.getValue();
283-
if (!strictParsing && !tm.isStructuredTypeNamespace(namespace) &&
284-
xmp.getSchema(namespace) == null && tm.getSchemaFactory(namespace) == null)
317+
if (!strictParsing)
285318
{
286-
// PDFBOX-5128: Add the schema on the fly if it can't be found
287-
// PDFBOX-5649: But only if the namespace isn't already known
288-
// because this adds a namespace without property descriptions
289-
tm.addNewNameSpace(namespace, attr.getLocalName());
319+
maybeAddNonStandardNamespace(xmp, attr);
290320
}
291321
}
292322
else

xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,4 +914,37 @@ void testPDFBOX6126() throws XmpParsingException, BadFieldValueException, Transf
914914
XMPSchema uaSchema2 = xmp2.getSchema("http://www.aiim.org/pdfua/ns/id/");
915915
assertEquals(1, uaSchema2.getIntegerPropertyValueAsSimple("part"));
916916
}
917+
918+
@Test
919+
void testNonStandardURIinRDF() throws XmpParsingException, TransformerException
920+
{
921+
String s = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" +
922+
"<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?><x:xmpmeta xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"Adobe XMP Core 4.2.1-c041 52.342996, 2008/05/07-20:48:00 \">\n" +
923+
" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
924+
" <rdf:Description xmlns:pdfx=\"http://ns.adobe.com/pdfx/1.3/\" rdf:about=\"\">\n" +
925+
" <pdfx:XPressPrivate>private</pdfx:XPressPrivate>\n" +
926+
" </rdf:Description>\n" +
927+
" </rdf:RDF>\n" +
928+
"</x:xmpmeta><?xpacket end=\"w\"?>";
929+
final DomXmpParser xmpParser1 = new DomXmpParser();
930+
XmpParsingException ex = assertThrows(XmpParsingException.class,
931+
() -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
932+
assertEquals("Cannot find a definition for the namespace http://ns.adobe.com/pdfx/1.3/, property: pdfx:XPressPrivate", ex.getMessage());
933+
DomXmpParser xmpParser2 = new DomXmpParser();
934+
xmpParser2.setStrictParsing(false);
935+
XMPMetadata xmp2 = xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
936+
XMPSchema schema2 = xmp2.getSchema("http://ns.adobe.com/pdfx/1.3/");
937+
assertEquals("[XPressPrivate=TextType:private]", schema2.getProperty("XPressPrivate").toString());
938+
XmpSerializer serializer = new XmpSerializer();
939+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
940+
serializer.serialize(xmp2, baos, true);
941+
final DomXmpParser xmpParser3 = new DomXmpParser();
942+
ex = assertThrows(XmpParsingException.class, () -> xmpParser3.parse(baos.toByteArray()));
943+
assertEquals("Cannot find a definition for the namespace http://ns.adobe.com/pdfx/1.3/, property: pdfx:XPressPrivate", ex.getMessage());
944+
DomXmpParser xmpParser4 = new DomXmpParser();
945+
xmpParser4.setStrictParsing(false);
946+
XMPMetadata xmp4 = xmpParser4.parse(baos.toByteArray());
947+
XMPSchema schema4 = xmp4.getSchema("http://ns.adobe.com/pdfx/1.3/");
948+
assertEquals("[XPressPrivate=TextType:private]", schema4.getProperty("XPressPrivate").toString());
949+
}
917950
}

0 commit comments

Comments
 (0)