Skip to content

Commit dc19e4d

Browse files
committed
PDFBOX-3882: try to parse attributes as properties
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1930317 13f79535-47bb-0310-9956-ffa450edef68
1 parent 2e3ff18 commit dc19e4d

File tree

1 file changed

+104
-1
lines changed

1 file changed

+104
-1
lines changed

xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,19 @@ private AbstractField parseLiElement(XMPMetadata xmp, QName descriptor, Element
579579
throw new XmpParsingException(ErrorType.InvalidType, "Parsing of structured type failed", ex);
580580
}
581581
loadAttributes(af, liElement);
582+
if (af instanceof AbstractStructuredType)
583+
{
584+
PropertiesDescription pm;
585+
if (type.isStructured())
586+
{
587+
pm = tm.getStructuredPropMapping(type);
588+
}
589+
else
590+
{
591+
pm = tm.getDefinedDescriptionByNamespace(liElement.getNamespaceURI());
592+
}
593+
af = tryParseAttributesAsProperties(xmp, liElement, tm, (AbstractStructuredType) af, pm, null);
594+
}
582595
return af;
583596
}
584597
}
@@ -618,7 +631,7 @@ private AbstractStructuredType parseLiDescription(XMPMetadata xmp, QName descrip
618631
if (liDescriptionElementChildren.isEmpty())
619632
{
620633
// The list is empty
621-
return null;
634+
return tryParseAttributesAsProperties(xmp, liDescriptionElement, tm, null, null, descriptor);
622635
}
623636
// Instantiate abstract structured type with hint from first element
624637
Element firstLiDescriptionElementChild = liDescriptionElementChildren.get(0);
@@ -708,6 +721,7 @@ else if (type.type().isStructured())
708721
}
709722

710723
}
724+
ast = tryParseAttributesAsProperties(xmp, liDescriptionElement, tm, ast, pm, descriptor);
711725
nsFinder.pop();
712726
return ast;
713727
}
@@ -952,6 +966,95 @@ private PropertyType checkPropertyDefinition(XMPMetadata xmp, QName prop) throws
952966
}
953967
}
954968

969+
/**
970+
* This attempts to run the same logic as in parseLiDescription() but with simple attributes
971+
* that will be treated like children. This is inspired by loadAttributes() and
972+
* parseDescriptionRootAttr(). This solves the problem in PDFBOX-3882 where properties appear as
973+
* attributes in places lower than the descriptor root.
974+
*
975+
* @param xmp
976+
* @param liElement
977+
* @param tm
978+
* @param ast An AbstractStructuredType object, can be null.
979+
* @param pm A PropertiesDescription object, must be set if ast is not null.
980+
* @param qName QName of the parent, will be used if instanciating an AbstractStructuredType
981+
* object, must be set if ast is not null.
982+
* @return An AbstractStructuredType, possibly created here if it was null as parameter.
983+
* @throws XmpParsingException
984+
*/
985+
private AbstractStructuredType tryParseAttributesAsProperties(XMPMetadata xmp, Element liElement,
986+
TypeMapping tm, AbstractStructuredType ast, PropertiesDescription pm, QName qName) throws XmpParsingException
987+
{
988+
NamedNodeMap attributes = liElement.getAttributes();
989+
if (attributes == null)
990+
{
991+
return ast;
992+
}
993+
for (int i = 0; i < attributes.getLength(); ++i)
994+
{
995+
Attr attr = (Attr) attributes.item(i);
996+
if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
997+
{
998+
// do nothing
999+
}
1000+
else if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix())
1001+
&& XmpConstants.ABOUT_NAME.equals(attr.getLocalName()))
1002+
{
1003+
// do nothing (maybe later?)
1004+
}
1005+
else if (XMLConstants.XML_NS_URI.equals(attr.getNamespaceURI()))
1006+
{
1007+
// do nothing
1008+
}
1009+
else if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix()))
1010+
{
1011+
// other rdf stuff, e.g. rdf:parseType
1012+
}
1013+
else
1014+
{
1015+
if (ast == null && attr.getNamespaceURI() != null) // What to do if attr.getNamespaceURI() is null?
1016+
{
1017+
// like in parseLiDescription():
1018+
// Instantiate abstract structured type with hint from first element
1019+
QName attrQName = new QName(attr.getNamespaceURI(), attr.getLocalName(), attr.getPrefix());
1020+
PropertyType ctype = checkPropertyDefinition(xmp, attrQName);
1021+
// PDFBOX-2318, PDFBOX-6106: Default to text if no type is found
1022+
if (ctype == null)
1023+
{
1024+
if (strictParsing)
1025+
{
1026+
throw new XmpParsingException(ErrorType.InvalidType, "No type defined for {" + attr.getNamespaceURI() + "}"
1027+
+ attr.getLocalName());
1028+
}
1029+
else
1030+
{
1031+
ctype = TypeMapping.createPropertyType(Types.Text, Cardinality.Simple);
1032+
}
1033+
}
1034+
Types tt = ctype.type();
1035+
ast = instanciateStructured(tm, tt, qName.getLocalPart(), attr.getNamespaceURI());
1036+
if (tt.isStructured())
1037+
{
1038+
pm = tm.getStructuredPropMapping(tt);
1039+
}
1040+
else
1041+
{
1042+
pm = tm.getDefinedDescriptionByNamespace(attr.getNamespaceURI());
1043+
}
1044+
}
1045+
if (ast != null && pm != null && attr.getNamespaceURI() != null)
1046+
{
1047+
PropertyType type = pm.getPropertyType(attr.getLocalName());
1048+
AbstractSimpleProperty asp = tm.instanciateSimpleProperty(
1049+
attr.getNamespaceURI(), attr.getPrefix(), attr.getLocalName(),
1050+
attr.getValue(), type.type());
1051+
ast.getContainer().addProperty(asp);
1052+
}
1053+
}
1054+
}
1055+
return ast;
1056+
}
1057+
9551058
protected static class NamespaceFinder
9561059
{
9571060
private final Deque<Map<String, String>> stack = new ArrayDeque<>();

0 commit comments

Comments
 (0)