Skip to content

Commit 9d934c9

Browse files
committed
refactor XML decoding to support charset handling
1 parent cf869d8 commit 9d934c9

File tree

3 files changed

+16
-5
lines changed

3 files changed

+16
-5
lines changed

go.mod

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
module github.com/aafeher/go-sitemap-parser
22

33
go 1.21
4+
5+
require golang.org/x/net v0.34.0
6+
7+
require golang.org/x/text v0.21.0 // indirect

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
2+
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
3+
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
4+
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=

sitemap.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"encoding/xml"
77
"errors"
88
"fmt"
9+
"golang.org/x/net/html/charset"
910
"io"
1011
"math/rand"
1112
"net/http"
@@ -546,7 +547,10 @@ func (s *S) parseSitemapIndex(data string) (sitemapIndex, error) {
546547
return smIndex, fmt.Errorf("sitemapindex is empty")
547548
}
548549

549-
err := xml.Unmarshal([]byte(data), &smIndex)
550+
decoder := xml.NewDecoder(bytes.NewReader([]byte(data)))
551+
decoder.CharsetReader = charset.NewReaderLabel
552+
553+
err := decoder.Decode(&smIndex)
550554
return smIndex, err
551555

552556
}
@@ -562,11 +566,10 @@ func (s *S) parseURLSet(data string) (URLSet, error) {
562566
return urlSet, fmt.Errorf("sitemap is empty")
563567
}
564568

565-
err := xml.Unmarshal([]byte(data), &urlSet)
566-
if err != nil {
567-
return urlSet, err
568-
}
569+
decoder := xml.NewDecoder(bytes.NewReader([]byte(data)))
570+
decoder.CharsetReader = charset.NewReaderLabel
569571

572+
err := decoder.Decode(&urlSet)
570573
return urlSet, err
571574
}
572575

0 commit comments

Comments
 (0)