@@ -37,9 +37,22 @@ static std::string_view extract_XMP_content(const string &xmp_blob)
3737 return sv.substr (first, last - first + 1 );
3838}
3939
40- json parse_xml_element (const XMLElement *element)
40+ json parse_xml_element (const XMLElement *element, const std::string &parent_ns = " " )
4141{
4242 json result;
43+
44+ // Determine this element's namespace
45+ std::string element_name = element->Name ();
46+ size_t element_colon_pos = element_name.find (' :' );
47+ std::string current_ns;
48+
49+ // Special case: rdf:Description and rdf:li don't establish a new namespace context
50+ // They inherit the namespace from their parent
51+ if (element_name == " rdf:Description" || element_name == " rdf:li" || element_colon_pos == std::string::npos)
52+ current_ns = parent_ns;
53+ else // if its not one of the above elements and it has a prefix, use it
54+ current_ns = element_name.substr (0 , element_colon_pos);
55+
4356 // Parse attributes into the current object
4457 const XMLAttribute *attr = element->FirstAttribute ();
4558 while (attr)
@@ -52,10 +65,8 @@ json parse_xml_element(const XMLElement *element)
5265 if (attr_name == " xml:lang" )
5366 result[attr_name] = attr_value;
5467 else if (attr_name.find (" stEvt:" ) == 0 || attr_name.find (" stRef:" ) == 0 )
55- {
5668 // Special case: Adobe event structure - keep as flat key/value pairs
5769 result[attr_name.substr (6 )] = attr_value;
58- }
5970 else
6071 {
6172 // Extract namespace prefix
@@ -64,19 +75,26 @@ json parse_xml_element(const XMLElement *element)
6475 {
6576 std::string ns_prefix = attr_name.substr (0 , colon_pos);
6677 std::string local_name = attr_name.substr (colon_pos + 1 );
67- if (!result.contains (ns_prefix))
78+
79+ // Flatten if attribute namespace matches current element's namespace (especially for crs)
80+ if (ns_prefix == current_ns && !current_ns.empty ())
81+ result[local_name] = attr_value;
82+ else
6883 {
69- result[ns_prefix] = json::object ();
84+ if (!result.contains (ns_prefix))
85+ result[ns_prefix] = json::object ();
86+ result[ns_prefix][local_name] = attr_value;
7087 }
71- result[ns_prefix][local_name] = attr_value;
7288 }
7389 else
74- {
7590 result[attr_name] = attr_value;
76- }
7791 }
7892 attr = attr->Next ();
7993 }
94+
95+ // Parse child elements
96+ const XMLElement *child = element->FirstChildElement ();
97+
8098 // Handle text content
8199 const char *text = element->GetText ();
82100 if (text && strlen (text) > 0 )
@@ -97,16 +115,10 @@ json parse_xml_element(const XMLElement *element)
97115 // Otherwise just return the text
98116 return text_str;
99117 }
100- }
101- // Parse child elements
102- const XMLElement *child = element->FirstChildElement ();
103- // If no children and no attributes, check for text again
104- if (!child && result.empty ())
105- {
106- const char *text2 = element->GetText ();
107- if (text2 && strlen (text2) > 0 )
118+ else if (!child && result.empty ())
108119 {
109- return std::string (text2);
120+ // All whitespace text, no children, no attributes: return untrimmed
121+ return std::string (text);
110122 }
111123 }
112124
@@ -121,7 +133,7 @@ json parse_xml_element(const XMLElement *element)
121133 const XMLElement *item = child->FirstChildElement ();
122134 while (item)
123135 {
124- json item_json = parse_xml_element (item);
136+ json item_json = parse_xml_element (item, current_ns );
125137 seq_array.push_back (item_json);
126138 item = item->NextSiblingElement ();
127139 }
@@ -133,7 +145,7 @@ json parse_xml_element(const XMLElement *element)
133145 {
134146 std::string child_name = child->Name ();
135147 size_t colon_pos = child_name.find (' :' );
136- json child_json = parse_xml_element (child);
148+ json child_json = parse_xml_element (child, current_ns );
137149
138150 // Special handling for rdf:Description - skip the wrapper and merge contents directly
139151 if (child_name == " rdf:Description" )
@@ -168,19 +180,10 @@ json parse_xml_element(const XMLElement *element)
168180 {
169181 std::string ns_prefix = child_name.substr (0 , colon_pos);
170182 std::string local_name = child_name.substr (colon_pos + 1 );
171-
172- // if (ns_prefix == "crs")
173- // {
174- // // if (result.is_null())
175- // // result = json::object();
176-
177- // spdlog::info("Found crs element: {}:{}", ns_prefix, local_name);
178- // if (child_json.is_string())
179- // result[local_name] = child_json.get<std::string>();
180- // else
181- // result[local_name] = child_json;
182- // }
183- // else
183+ // Flatten if child namespace matches current element's namespace (especially for crs)
184+ if (ns_prefix == current_ns && !current_ns.empty ())
185+ result[local_name] = child_json;
186+ else
184187 {
185188 if (!result.contains (ns_prefix))
186189 result[ns_prefix] = json::object ();
0 commit comments