Skip to content

Commit b9309a4

Browse files
authored
XML decoder additions (#1239)
* Add xml-keep-namespace and xml-raw-token features * Add tests * Change flags usage strings * Append docs
1 parent 98193a7 commit b9309a4

File tree

11 files changed

+181
-21
lines changed

11 files changed

+181
-21
lines changed

cmd/constant.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ var inputFormat = "yaml"
1111
var xmlAttributePrefix = "+"
1212
var xmlContentName = "+content"
1313
var xmlStrictMode = false
14+
var xmlKeepNamespace = false
15+
var xmlUseRawToken = false
1416

1517
var exitStatus = false
1618
var forceColor = false

cmd/root.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ yq -P sample.json
7272
rootCmd.PersistentFlags().StringVar(&xmlAttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes")
7373
rootCmd.PersistentFlags().StringVar(&xmlContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).")
7474
rootCmd.PersistentFlags().BoolVar(&xmlStrictMode, "xml-strict-mode", false, "enables strict parsing of XML. See https://pkg.go.dev/encoding/xml for more details.")
75+
rootCmd.PersistentFlags().BoolVar(&xmlKeepNamespace, "xml-keep-namespace", false, "enables keeping namespace after parsing attributes")
76+
rootCmd.PersistentFlags().BoolVar(&xmlUseRawToken, "xml-raw-token", false, "enables using RawToken method instead Token. Commonly disables namespace translations. See https://pkg.go.dev/encoding/xml#Decoder.RawToken for details.")
7577

7678
rootCmd.PersistentFlags().BoolVarP(&nullInput, "null-input", "n", false, "Don't read input, simply evaluate the expression given. Useful for creating docs from scratch.")
7779
rootCmd.PersistentFlags().BoolVarP(&noDocSeparators, "no-doc", "N", false, "Don't print document separators (---)")

cmd/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func configureDecoder() (yqlib.Decoder, error) {
6363
}
6464
switch yqlibInputFormat {
6565
case yqlib.XMLInputFormat:
66-
return yqlib.NewXMLDecoder(xmlAttributePrefix, xmlContentName, xmlStrictMode), nil
66+
return yqlib.NewXMLDecoder(xmlAttributePrefix, xmlContentName, xmlStrictMode, xmlKeepNamespace, xmlUseRawToken), nil
6767
case yqlib.PropertiesInputFormat:
6868
return yqlib.NewPropertiesDecoder(), nil
6969
}

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,5 @@ gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473/go.mod h1:N1eN2tsCx
7272
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
7373
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
7474
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
75-
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
76-
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
7775
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
7876
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

pkg/yqlib/decoder_xml.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,23 @@ type xmlDecoder struct {
1717
attributePrefix string
1818
contentName string
1919
strictMode bool
20+
keepNamespace bool
21+
useRawToken bool
2022
finished bool
2123
}
2224

23-
func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool) Decoder {
25+
func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool) Decoder {
2426
if contentName == "" {
2527
contentName = "content"
2628
}
27-
return &xmlDecoder{attributePrefix: attributePrefix, contentName: contentName, finished: false, strictMode: strictMode}
29+
return &xmlDecoder{
30+
attributePrefix: attributePrefix,
31+
contentName: contentName,
32+
finished: false,
33+
strictMode: strictMode,
34+
keepNamespace: keepNamespace,
35+
useRawToken: useRawToken,
36+
}
2837
}
2938

3039
func (dec *xmlDecoder) Init(reader io.Reader) {
@@ -206,8 +215,15 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
206215
n: root,
207216
}
208217

218+
getToken := func() (xml.Token, error) {
219+
if dec.useRawToken {
220+
return xmlDec.RawToken()
221+
}
222+
return xmlDec.Token()
223+
}
224+
209225
for {
210-
t, e := xmlDec.Token()
226+
t, e := getToken()
211227
if e != nil && !errors.Is(e, io.EOF) {
212228
return e
213229
}
@@ -228,6 +244,11 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
228244

229245
// Extract attributes as children
230246
for _, a := range se.Attr {
247+
if dec.keepNamespace {
248+
if a.Name.Space != "" {
249+
a.Name.Local = a.Name.Space + ":" + a.Name.Local
250+
}
251+
}
231252
elem.n.AddChild(dec.attributePrefix+a.Name.Local, &xmlNode{Data: a.Value})
232253
}
233254
case xml.CharData:

pkg/yqlib/doc/usage/xml.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,52 @@ cat:
192192
# after cat
193193
```
194194

195+
## Parse xml: keep attribute namespace
196+
Given a sample.xml file of:
197+
```xml
198+
199+
<?xml version="1.0"?>
200+
<map xmlns="some-namespace" xmlns:xsi="some-instance" xsi:schemaLocation="some-url">
201+
</map>
202+
203+
```
204+
then
205+
```bash
206+
yq -p=xml -o=xml --xml-keep-namespace '.' sample.xml
207+
```
208+
will output
209+
```xml
210+
<map xmlns="some-namespace" xmlns:xsi="some-instance" some-instance:schemaLocation="some-url"></map>
211+
```
212+
213+
instead of
214+
```xml
215+
<map xmlns="some-namespace" xsi="some-instance" schemaLocation="some-url"></map>
216+
```
217+
218+
## Parse xml: keep raw attribute namespace
219+
Given a sample.xml file of:
220+
```xml
221+
222+
<?xml version="1.0"?>
223+
<map xmlns="some-namespace" xmlns:xsi="some-instance" xsi:schemaLocation="some-url">
224+
</map>
225+
226+
```
227+
then
228+
```bash
229+
yq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml
230+
```
231+
will output
232+
```xml
233+
<map xmlns="some-namespace" xmlns:xsi="some-instance" xsi:schemaLocation="some-url"></map>
234+
```
235+
236+
instead of
237+
```xml
238+
<map xmlns="some-namespace" xsi="some-instance" schemaLocation="some-url"></map>
239+
```
240+
195241
## Encode xml: simple
196242
Given a sample.yml file of:
197243
```yaml

pkg/yqlib/encoder_xml.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99
yaml "gopkg.in/yaml.v3"
1010
)
1111

12-
var XMLPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content", StrictMode: false}
12+
var XMLPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content", StrictMode: false, UseRawToken: false}
1313

1414
type xmlEncoder struct {
1515
attributePrefix string

pkg/yqlib/expression_tokeniser.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,9 +420,9 @@ func initLexer() (*lex.Lexer, error) {
420420

421421
lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewYamlDecoder()}))
422422

423-
lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)}))
424-
lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)}))
425-
lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)}))
423+
lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)}))
424+
lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)}))
425+
lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)}))
426426

427427
lexer.Add([]byte(`load_base64`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewBase64Decoder()}))
428428

pkg/yqlib/lib.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ type xmlPreferences struct {
2626
AttributePrefix string
2727
ContentName string
2828
StrictMode bool
29+
KeepNamespace bool
30+
UseRawToken bool
2931
}
3032

3133
var log = logging.MustGetLogger("yq-lib")

pkg/yqlib/operator_encoder_decoder.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,12 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
104104
case YamlInputFormat:
105105
decoder = NewYamlDecoder()
106106
case XMLInputFormat:
107-
decoder = NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)
107+
decoder = NewXMLDecoder(
108+
XMLPreferences.AttributePrefix,
109+
XMLPreferences.ContentName,
110+
XMLPreferences.StrictMode,
111+
XMLPreferences.KeepNamespace,
112+
XMLPreferences.UseRawToken)
108113
case Base64InputFormat:
109114
decoder = NewBase64Decoder()
110115
case PropertiesInputFormat:

0 commit comments

Comments
 (0)