@@ -579,6 +579,19 @@ private AbstractField parseLiElement(XMPMetadata xmp, QName descriptor, Element
579579 throw new XmpParsingException (ErrorType .InvalidType , "Parsing of structured type failed" , ex );
580580 }
581581 loadAttributes (af , liElement );
582+ if (af instanceof AbstractStructuredType )
583+ {
584+ PropertiesDescription pm ;
585+ if (type .isStructured ())
586+ {
587+ pm = tm .getStructuredPropMapping (type );
588+ }
589+ else
590+ {
591+ pm = tm .getDefinedDescriptionByNamespace (liElement .getNamespaceURI ());
592+ }
593+ af = tryParseAttributesAsProperties (xmp , liElement , tm , (AbstractStructuredType ) af , pm , null );
594+ }
582595 return af ;
583596 }
584597 }
@@ -618,7 +631,7 @@ private AbstractStructuredType parseLiDescription(XMPMetadata xmp, QName descrip
618631 if (liDescriptionElementChildren .isEmpty ())
619632 {
620633 // The list is empty
621- return null ;
634+ return tryParseAttributesAsProperties ( xmp , liDescriptionElement , tm , null , null , descriptor ) ;
622635 }
623636 // Instantiate abstract structured type with hint from first element
624637 Element firstLiDescriptionElementChild = liDescriptionElementChildren .get (0 );
@@ -708,6 +721,7 @@ else if (type.type().isStructured())
708721 }
709722
710723 }
724+ ast = tryParseAttributesAsProperties (xmp , liDescriptionElement , tm , ast , pm , descriptor );
711725 nsFinder .pop ();
712726 return ast ;
713727 }
@@ -952,6 +966,95 @@ private PropertyType checkPropertyDefinition(XMPMetadata xmp, QName prop) throws
952966 }
953967 }
954968
969+ /**
970+ * This attempts to run the same logic as in parseLiDescription() but with simple attributes
971+ * that will be treated like children. This is inspired by loadAttributes() and
972+ * parseDescriptionRootAttr(). This solves the problem in PDFBOX-3882 where properties appear as
973+ * attributes in places lower than the descriptor root.
974+ *
975+ * @param xmp
976+ * @param liElement
977+ * @param tm
978+ * @param ast An AbstractStructuredType object, can be null.
979+ * @param pm A PropertiesDescription object, must be set if ast is not null.
980+ * @param qName QName of the parent, will be used if instanciating an AbstractStructuredType
981+ * object, must be set if ast is not null.
982+ * @return An AbstractStructuredType, possibly created here if it was null as parameter.
983+ * @throws XmpParsingException
984+ */
985+ private AbstractStructuredType tryParseAttributesAsProperties (XMPMetadata xmp , Element liElement ,
986+ TypeMapping tm , AbstractStructuredType ast , PropertiesDescription pm , QName qName ) throws XmpParsingException
987+ {
988+ NamedNodeMap attributes = liElement .getAttributes ();
989+ if (attributes == null )
990+ {
991+ return ast ;
992+ }
993+ for (int i = 0 ; i < attributes .getLength (); ++i )
994+ {
995+ Attr attr = (Attr ) attributes .item (i );
996+ if (XMLConstants .XMLNS_ATTRIBUTE .equals (attr .getPrefix ()))
997+ {
998+ // do nothing
999+ }
1000+ else if (XmpConstants .DEFAULT_RDF_PREFIX .equals (attr .getPrefix ())
1001+ && XmpConstants .ABOUT_NAME .equals (attr .getLocalName ()))
1002+ {
1003+ // do nothing (maybe later?)
1004+ }
1005+ else if (XMLConstants .XML_NS_URI .equals (attr .getNamespaceURI ()))
1006+ {
1007+ // do nothing
1008+ }
1009+ else if (XmpConstants .DEFAULT_RDF_PREFIX .equals (attr .getPrefix ()))
1010+ {
1011+ // other rdf stuff, e.g. rdf:parseType
1012+ }
1013+ else
1014+ {
1015+ if (ast == null && attr .getNamespaceURI () != null ) // What to do if attr.getNamespaceURI() is null?
1016+ {
1017+ // like in parseLiDescription():
1018+ // Instantiate abstract structured type with hint from first element
1019+ QName attrQName = new QName (attr .getNamespaceURI (), attr .getLocalName (), attr .getPrefix ());
1020+ PropertyType ctype = checkPropertyDefinition (xmp , attrQName );
1021+ // PDFBOX-2318, PDFBOX-6106: Default to text if no type is found
1022+ if (ctype == null )
1023+ {
1024+ if (strictParsing )
1025+ {
1026+ throw new XmpParsingException (ErrorType .InvalidType , "No type defined for {" + attr .getNamespaceURI () + "}"
1027+ + attr .getLocalName ());
1028+ }
1029+ else
1030+ {
1031+ ctype = TypeMapping .createPropertyType (Types .Text , Cardinality .Simple );
1032+ }
1033+ }
1034+ Types tt = ctype .type ();
1035+ ast = instanciateStructured (tm , tt , qName .getLocalPart (), attr .getNamespaceURI ());
1036+ if (tt .isStructured ())
1037+ {
1038+ pm = tm .getStructuredPropMapping (tt );
1039+ }
1040+ else
1041+ {
1042+ pm = tm .getDefinedDescriptionByNamespace (attr .getNamespaceURI ());
1043+ }
1044+ }
1045+ if (ast != null && pm != null && attr .getNamespaceURI () != null )
1046+ {
1047+ PropertyType type = pm .getPropertyType (attr .getLocalName ());
1048+ AbstractSimpleProperty asp = tm .instanciateSimpleProperty (
1049+ attr .getNamespaceURI (), attr .getPrefix (), attr .getLocalName (),
1050+ attr .getValue (), type .type ());
1051+ ast .getContainer ().addProperty (asp );
1052+ }
1053+ }
1054+ }
1055+ return ast ;
1056+ }
1057+
9551058 protected static class NamespaceFinder
9561059 {
9571060 private final Deque <Map <String , String >> stack = new ArrayDeque <>();
0 commit comments