Skip to content

Commit 02be2b2

Browse files
committed
Fixed issue where content surrounding tags are lost #1447
1 parent 43233ce commit 02be2b2

File tree

4 files changed

+77
-14
lines changed

4 files changed

+77
-14
lines changed

pkg/yqlib/decoder_xml.go

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,16 @@ func (dec *xmlDecoder) processComment(c string) string {
5353
}
5454

5555
func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
56-
log.Debug("createMap: headC: %v, footC: %v", n.HeadComment, n.FootComment)
56+
log.Debug("createMap: headC: %v, lineC: %v, footC: %v", n.HeadComment, n.LineComment, n.FootComment)
5757
yamlNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
5858

5959
if len(n.Data) > 0 {
6060
label := dec.prefs.ContentName
6161
labelNode := createScalarNode(label, label)
6262
labelNode.HeadComment = dec.processComment(n.HeadComment)
63+
labelNode.LineComment = dec.processComment(n.LineComment)
6364
labelNode.FootComment = dec.processComment(n.FootComment)
64-
yamlNode.Content = append(yamlNode.Content, labelNode, createScalarNode(n.Data, n.Data))
65+
yamlNode.Content = append(yamlNode.Content, labelNode, dec.createValueNodeFromData(n.Data))
6566
}
6667

6768
for i, keyValuePair := range n.Children {
@@ -89,6 +90,7 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
8990
// if the value is a scalar, the head comment of the scalar needs to go on the key?
9091
// add tests for <z/> as well as multiple <ds> of inputXmlWithComments > yaml
9192
if len(children[0].Children) == 0 && children[0].HeadComment != "" {
93+
log.Debug("scalar comment hack")
9294
labelNode.HeadComment = labelNode.HeadComment + "\n" + strings.TrimSpace(children[0].HeadComment)
9395
children[0].HeadComment = ""
9496
}
@@ -103,17 +105,40 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
103105
return yamlNode, nil
104106
}
105107

108+
func (dec *xmlDecoder) createValueNodeFromData(values []string) *yaml.Node {
109+
switch len(values) {
110+
case 0:
111+
return createScalarNode(nil, "")
112+
case 1:
113+
return createScalarNode(values[0], values[0])
114+
default:
115+
content := make([]*yaml.Node, 0)
116+
for _, value := range values {
117+
content = append(content, createScalarNode(value, value))
118+
}
119+
return &yaml.Node{
120+
Kind: yaml.SequenceNode,
121+
Tag: "!!seq",
122+
Content: content,
123+
}
124+
}
125+
}
126+
106127
func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) {
107128
if len(n.Children) > 0 {
108129
return dec.createMap(n)
109130
}
110-
scalar := createScalarNode(n.Data, n.Data)
111-
if n.Data == "" {
112-
scalar = createScalarNode(nil, "")
113-
}
114-
log.Debug("scalar headC: %v, footC: %v", n.HeadComment, n.FootComment)
131+
132+
scalar := dec.createValueNodeFromData(n.Data)
133+
134+
log.Debug("scalar (%v), headC: %v, lineC: %v, footC: %v", scalar.Tag, n.HeadComment, n.LineComment, n.FootComment)
115135
scalar.HeadComment = dec.processComment(n.HeadComment)
116136
scalar.LineComment = dec.processComment(n.LineComment)
137+
if scalar.Tag == "!!seq" {
138+
scalar.Content[0].HeadComment = scalar.LineComment
139+
scalar.LineComment = ""
140+
}
141+
117142
scalar.FootComment = dec.processComment(n.FootComment)
118143

119144
return scalar, nil
@@ -156,7 +181,7 @@ type xmlNode struct {
156181
HeadComment string
157182
FootComment string
158183
LineComment string
159-
Data string
184+
Data []string
160185
}
161186

162187
type xmlChildrenKv struct {
@@ -241,12 +266,13 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
241266
a.Name.Local = a.Name.Space + ":" + a.Name.Local
242267
}
243268
}
244-
elem.n.AddChild(dec.prefs.AttributePrefix+a.Name.Local, &xmlNode{Data: a.Value})
269+
elem.n.AddChild(dec.prefs.AttributePrefix+a.Name.Local, &xmlNode{Data: []string{a.Value}})
245270
}
246271
case xml.CharData:
247272
// Extract XML data (if any)
248-
elem.n.Data = elem.n.Data + trimNonGraphic(string(se))
249-
if elem.n.Data != "" {
273+
newBit := trimNonGraphic(string(se))
274+
if len(newBit) > 0 {
275+
elem.n.Data = append(elem.n.Data, newBit)
250276
elem.state = "chardata"
251277
log.Debug("chardata [%v] for %v", elem.n.Data, elem.label)
252278
}
@@ -276,11 +302,11 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
276302

277303
case xml.ProcInst:
278304
if !dec.prefs.SkipProcInst {
279-
elem.n.AddChild(dec.prefs.ProcInstPrefix+se.Target, &xmlNode{Data: string(se.Inst)})
305+
elem.n.AddChild(dec.prefs.ProcInstPrefix+se.Target, &xmlNode{Data: []string{string(se.Inst)}})
280306
}
281307
case xml.Directive:
282308
if !dec.prefs.SkipDirectives {
283-
elem.n.AddChild(dec.prefs.DirectiveName, &xmlNode{Data: string(se)})
309+
elem.n.AddChild(dec.prefs.DirectiveName, &xmlNode{Data: []string{string(se)}})
284310
}
285311
}
286312
}

pkg/yqlib/doc/usage/xml.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,27 @@ cat:
152152
+@legs: "4"
153153
```
154154

155+
## Parse xml: content split between comments/children
156+
Multiple content texts are collected into a sequence.
157+
158+
Given a sample.xml file of:
159+
```xml
160+
<root> value <!-- comment-->anotherValue <a>frog</a> cool!</root>
161+
```
162+
then
163+
```bash
164+
yq -p=xml '.' sample.xml
165+
```
166+
will output
167+
```yaml
168+
root:
169+
+content: # comment
170+
- value
171+
- anotherValue
172+
- cool!
173+
a: frog
174+
```
175+
155176
## Parse xml: custom dtd
156177
DTD entities are processed as directives.
157178

pkg/yqlib/operators_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ type expressionScenario struct {
3131
}
3232

3333
func TestMain(m *testing.M) {
34-
logging.SetLevel(logging.ERROR, "")
34+
logging.SetLevel(logging.DEBUG, "")
3535
Now = func() time.Time {
3636
return time.Date(2021, time.May, 19, 1, 2, 3, 4, time.UTC)
3737
}

pkg/yqlib/xml_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,16 @@ var xmlScenarios = []formatScenario{
238238
input: "<root>value<!-- comment--> </root>",
239239
expected: "root: value # comment\n",
240240
},
241+
{
242+
skipDoc: true,
243+
input: "<root> <!-- comment-->value</root>",
244+
expected: "\n# comment\nroot: value\n", //needs fix
245+
},
246+
{
247+
skipDoc: true,
248+
input: "<root>value<!-- comment-->anotherValue </root>",
249+
expected: "root:\n # comment\n - value\n - anotherValue\n",
250+
},
241251
{
242252
description: "Parse xml: simple",
243253
subdescription: "Notice how all the values are strings, see the next example on how you can fix that.",
@@ -269,6 +279,12 @@ var xmlScenarios = []formatScenario{
269279
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
270280
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +content: meow\n +@legs: \"4\"\n",
271281
},
282+
{
283+
description: "Parse xml: content split between comments/children",
284+
subdescription: "Multiple content texts are collected into a sequence.",
285+
input: "<root> value <!-- comment-->anotherValue <a>frog</a> cool!</root>",
286+
expected: "root:\n +content: # comment\n - value\n - anotherValue\n - cool!\n a: frog\n",
287+
},
272288
{
273289
description: "Parse xml: custom dtd",
274290
subdescription: "DTD entities are processed as directives.",

0 commit comments

Comments
 (0)