Skip to content

Commit 2195df9

Browse files
committed
Fixed xml encoding of ProcInst #1563, improved XML comment handling
1 parent fdb1487 commit 2195df9

File tree

4 files changed

+183
-34
lines changed

4 files changed

+183
-34
lines changed

pkg/yqlib/decoder_xml.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"errors"
88
"fmt"
99
"io"
10+
"regexp"
1011
"strings"
1112
"unicode"
1213

@@ -48,11 +49,19 @@ func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) {
4849
return yamlNode, nil
4950
}
5051

52+
var decoderCommentPrefix = regexp.MustCompile(`(^|\n)([[:alpha:]])`)
53+
5154
func (dec *xmlDecoder) processComment(c string) string {
5255
if c == "" {
5356
return ""
5457
}
55-
return "#" + strings.TrimRight(c, " ")
58+
//need to replace "cat " with "# cat"
59+
// "\ncat\n" with "\n cat\n"
60+
// ensure non-empty comments starting with newline have a space in front
61+
62+
replacement := decoderCommentPrefix.ReplaceAllString(c, "$1 $2")
63+
replacement = "#" + strings.ReplaceAll(strings.TrimRight(replacement, " "), "\n", "\n#")
64+
return replacement
5665
}
5766

5867
func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
@@ -77,6 +86,7 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
7786
var err error
7887

7988
if i == 0 {
89+
log.Debugf("head comment here")
8090
labelNode.HeadComment = dec.processComment(n.HeadComment)
8191

8292
}

pkg/yqlib/doc/usage/xml.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,10 @@ A best attempt is made to copy comments to xml.
407407

408408
Given a sample.yml file of:
409409
```yaml
410+
#
410411
# header comment
411412
# above_cat
413+
#
412414
cat: # inline_cat
413415
# above_array
414416
array: # inline_array
@@ -425,9 +427,11 @@ yq -o=xml '.' sample.yml
425427
will output
426428
```xml
427429
<!--
428-
header comment
429-
above_cat
430-
--><!-- inline_cat --><cat><!-- above_array inline_array -->
430+
header comment
431+
above_cat
432+
-->
433+
<!-- inline_cat -->
434+
<cat><!-- above_array inline_array -->
431435
<array>val1<!-- inline_val1 --></array>
432436
<array><!-- above_val2 -->val2<!-- inline_val2 --></array>
433437
</cat><!-- below_cat -->
@@ -489,7 +493,8 @@ yq -p=xml -o=xml '.' sample.xml
489493
```
490494
will output
491495
```xml
492-
<!-- before cat --><cat><!-- in cat before -->
496+
<!-- before cat -->
497+
<cat><!-- in cat before -->
493498
<x>3<!-- multi
494499
line comment
495500
for x --></x><!-- before y -->

pkg/yqlib/encoder_xml.go

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ type xmlEncoder struct {
1919
leadingContent string
2020
}
2121

22-
var commentPrefix = regexp.MustCompile(`(^|\n)\s*#`)
23-
2422
func NewXMLEncoder(indent int, prefs XmlPreferences) Encoder {
2523
var indentString = ""
2624

@@ -39,7 +37,7 @@ func (e *xmlEncoder) PrintDocumentSeparator(writer io.Writer) error {
3937
}
4038

4139
func (e *xmlEncoder) PrintLeadingContent(writer io.Writer, content string) error {
42-
e.leadingContent = commentPrefix.ReplaceAllString(content, "\n")
40+
e.leadingContent = content
4341
return nil
4442
}
4543

@@ -48,12 +46,39 @@ func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error {
4846
// hack so we can manually add newlines to procInst and directives
4947
e.writer = writer
5048
encoder.Indent("", e.indentString)
49+
var newLine xml.CharData = []byte("\n")
50+
51+
mapNode := unwrapDoc(node)
52+
if mapNode.Tag == "!!map" {
53+
// make sure <?xml .. ?> processing instructions are encoded first
54+
for i := 0; i < len(mapNode.Content); i += 2 {
55+
key := mapNode.Content[i]
56+
value := mapNode.Content[i+1]
57+
58+
if key.Value == (e.prefs.ProcInstPrefix + "xml") {
59+
name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1)
60+
procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)}
61+
if err := encoder.EncodeToken(procInst); err != nil {
62+
return err
63+
}
64+
if _, err := e.writer.Write([]byte("\n")); err != nil {
65+
log.Warning("Unable to write newline, skipping: %w", err)
66+
}
67+
}
68+
}
69+
}
5170

5271
if e.leadingContent != "" {
72+
73+
// remove first and last newlines if present
5374
err := e.encodeComment(encoder, e.leadingContent)
5475
if err != nil {
5576
return err
5677
}
78+
err = encoder.EncodeToken(newLine)
79+
if err != nil {
80+
return err
81+
}
5782
}
5883

5984
switch node.Kind {
@@ -89,29 +114,12 @@ func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error {
89114
default:
90115
return fmt.Errorf("unsupported type %v", node.Tag)
91116
}
92-
var charData xml.CharData = []byte("\n")
93-
return encoder.EncodeToken(charData)
117+
118+
return encoder.EncodeToken(newLine)
94119

95120
}
96121

97122
func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) error {
98-
// make sure <?xml .. ?> processing instructions are encoded first
99-
for i := 0; i < len(node.Content); i += 2 {
100-
key := node.Content[i]
101-
value := node.Content[i+1]
102-
103-
if key.Value == (e.prefs.ProcInstPrefix + "xml") {
104-
name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1)
105-
procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)}
106-
if err := encoder.EncodeToken(procInst); err != nil {
107-
return err
108-
}
109-
if _, err := e.writer.Write([]byte("\n")); err != nil {
110-
log.Warning("Unable to write newline, skipping: %w", err)
111-
}
112-
}
113-
}
114-
115123
err := e.encodeComment(encoder, headAndLineComment(node))
116124
if err != nil {
117125
return err
@@ -126,6 +134,13 @@ func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) er
126134
if err != nil {
127135
return err
128136
}
137+
if headAndLineComment(key) != "" {
138+
var newLine xml.CharData = []byte("\n")
139+
err = encoder.EncodeToken(newLine)
140+
if err != nil {
141+
return err
142+
}
143+
}
129144

130145
if key.Value == (e.prefs.ProcInstPrefix + "xml") {
131146
// dont double process these.
@@ -206,12 +221,33 @@ func (e *xmlEncoder) doEncode(encoder *xml.Encoder, node *yaml.Node, start xml.S
206221
return fmt.Errorf("unsupported type %v", node.Tag)
207222
}
208223

224+
var xmlEncodeMultilineCommentRegex = regexp.MustCompile(`(^|\n) *# ?(.*)`)
225+
var xmlEncodeSingleLineCommentRegex = regexp.MustCompile(`^\s*#(.*)\n?`)
226+
var chompRegexp = regexp.MustCompile(`\n$`)
227+
209228
func (e *xmlEncoder) encodeComment(encoder *xml.Encoder, commentStr string) error {
210229
if commentStr != "" {
211-
log.Debugf("encoding comment %v", commentStr)
212-
if !strings.HasSuffix(commentStr, " ") {
230+
log.Debugf("got comment [%v]", commentStr)
231+
// multi line string
232+
if len(commentStr) > 2 && strings.Contains(commentStr[1:len(commentStr)-1], "\n") {
233+
commentStr = chompRegexp.ReplaceAllString(commentStr, "")
234+
log.Debugf("chompRegexp [%v]", commentStr)
235+
commentStr = xmlEncodeMultilineCommentRegex.ReplaceAllString(commentStr, "$1$2")
236+
log.Debugf("processed multine [%v]", commentStr)
237+
// if the first line is non blank, add a space
238+
if commentStr[0] != '\n' && commentStr[0] != ' ' {
239+
commentStr = " " + commentStr
240+
}
241+
242+
} else {
243+
commentStr = xmlEncodeSingleLineCommentRegex.ReplaceAllString(commentStr, "$1")
244+
}
245+
246+
if !strings.HasSuffix(commentStr, " ") && !strings.HasSuffix(commentStr, "\n") {
213247
commentStr = commentStr + " "
248+
log.Debugf("added suffix [%v]", commentStr)
214249
}
250+
log.Debugf("encoding comment [%v]", commentStr)
215251

216252
var comment xml.Comment = []byte(commentStr)
217253
err := encoder.EncodeToken(comment)

pkg/yqlib/xml_test.go

Lines changed: 103 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,29 @@ import (
1010
"github.com/mikefarah/yq/v4/test"
1111
)
1212

13+
const yamlInputWithProcInstAndHeadComment = `# cats
14+
+p_xml: version="1.0"
15+
this: is some xml`
16+
17+
const expectedXmlProcInstAndHeadComment = `<?xml version="1.0"?>
18+
<!-- cats -->
19+
<this>is some xml</this>
20+
`
21+
22+
const xmlProcInstAndHeadCommentBlock = `<?xml version="1.0"?>
23+
<!--
24+
cats
25+
-->
26+
<this>is some xml</this>
27+
`
28+
29+
const expectedYamlProcInstAndHeadCommentBlock = `#
30+
# cats
31+
#
32+
+p_xml: version="1.0"
33+
this: is some xml
34+
`
35+
1336
const inputXMLWithComments = `
1437
<!-- before cat -->
1538
<cat>
@@ -128,7 +151,8 @@ cat:
128151
# after cat
129152
`
130153

131-
const expectedRoundtripXMLWithComments = `<!-- before cat --><cat><!-- in cat before -->
154+
const expectedRoundtripXMLWithComments = `<!-- before cat -->
155+
<cat><!-- in cat before -->
132156
<x>3<!-- multi
133157
line comment
134158
for x --></x><!-- before y -->
@@ -139,8 +163,10 @@ in d before -->
139163
</cat><!-- after cat -->
140164
`
141165

142-
const yamlWithComments = `# header comment
166+
const yamlWithComments = `#
167+
# header comment
143168
# above_cat
169+
#
144170
cat: # inline_cat
145171
# above_array
146172
array: # inline_array
@@ -151,9 +177,11 @@ cat: # inline_cat
151177
`
152178

153179
const expectedXMLWithComments = `<!--
154-
header comment
155-
above_cat
156-
--><!-- inline_cat --><cat><!-- above_array inline_array -->
180+
header comment
181+
above_cat
182+
-->
183+
<!-- inline_cat -->
184+
<cat><!-- above_array inline_array -->
157185
<array>val1<!-- inline_val1 --></array>
158186
<array><!-- above_val2 -->val2<!-- inline_val2 --></array>
159187
</cat><!-- below_cat -->
@@ -266,6 +294,41 @@ var xmlScenarios = []formatScenario{
266294
input: "<root><cats><cat>quick</cat><cat>soft</cat><!-- kitty_comment--><cat>squishy</cat></cats></root>",
267295
expected: "root:\n cats:\n cat:\n - quick\n - soft\n # kitty_comment\n\n - squishy\n",
268296
},
297+
{
298+
description: "ProcInst with head comment",
299+
skipDoc: true,
300+
input: yamlInputWithProcInstAndHeadComment,
301+
expected: expectedXmlProcInstAndHeadComment,
302+
scenarioType: "encode",
303+
},
304+
{
305+
description: "ProcInst with head comment round trip",
306+
skipDoc: true,
307+
input: expectedXmlProcInstAndHeadComment,
308+
expected: expectedXmlProcInstAndHeadComment,
309+
scenarioType: "roundtrip",
310+
},
311+
{
312+
description: "ProcInst with block head comment to yaml",
313+
skipDoc: true,
314+
input: xmlProcInstAndHeadCommentBlock,
315+
expected: expectedYamlProcInstAndHeadCommentBlock,
316+
scenarioType: "decode",
317+
},
318+
{
319+
description: "ProcInst with block head comment from yaml",
320+
skipDoc: true,
321+
input: expectedYamlProcInstAndHeadCommentBlock,
322+
expected: xmlProcInstAndHeadCommentBlock,
323+
scenarioType: "encode",
324+
},
325+
{
326+
description: "ProcInst with head comment round trip block",
327+
skipDoc: true,
328+
input: xmlProcInstAndHeadCommentBlock,
329+
expected: xmlProcInstAndHeadCommentBlock,
330+
scenarioType: "roundtrip",
331+
},
269332
{
270333
description: "Parse xml: simple",
271334
subdescription: "Notice how all the values are strings, see the next example on how you can fix that.",
@@ -466,6 +529,41 @@ var xmlScenarios = []formatScenario{
466529
expected: "<cat name=\"tiger\">cool</cat>\n",
467530
scenarioType: "encode",
468531
},
532+
{
533+
description: "round trip multiline 1",
534+
skipDoc: true,
535+
input: "<x><!-- cats --></x>\n",
536+
expected: "<x><!-- cats --></x>\n",
537+
scenarioType: "roundtrip",
538+
},
539+
{
540+
description: "round trip multiline 2",
541+
skipDoc: true,
542+
input: "<x><!--\n cats\n --></x>\n",
543+
expected: "<x><!--\ncats\n--></x>\n",
544+
scenarioType: "roundtrip",
545+
},
546+
{
547+
description: "round trip multiline 3",
548+
skipDoc: true,
549+
input: "<x><!--\n\tcats\n --></x>\n",
550+
expected: "<x><!--\n\tcats\n--></x>\n",
551+
scenarioType: "roundtrip",
552+
},
553+
{
554+
description: "round trip multiline 4",
555+
skipDoc: true,
556+
input: "<x><!--\n\tcats\n\tdogs\n--></x>\n",
557+
expected: "<x><!--\n\tcats\n\tdogs\n--></x>\n",
558+
scenarioType: "roundtrip",
559+
},
560+
{
561+
description: "round trip multiline 5",
562+
skipDoc: true, // pity spaces aren't kept atm.
563+
input: "<x><!--\ncats\ndogs\n--></x>\n",
564+
expected: "<x><!--\ncats\ndogs\n--></x>\n",
565+
scenarioType: "roundtrip",
566+
},
469567
{
470568
description: "Encode xml: comments",
471569
subdescription: "A best attempt is made to copy comments to xml.",

0 commit comments

Comments
 (0)