Skip to content

Commit 62431c4

Browse files
author
Mario Hros
committed
do not lowercase href attribute of A tag links
1 parent 89ca77f commit 62431c4

File tree

2 files changed

+14
-12
lines changed

2 files changed

+14
-12
lines changed

html2text.go

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package html2text
33
import (
44
"bytes"
55
"regexp"
6-
"strings"
76
"strconv"
7+
"strings"
88
)
99

1010
const (
@@ -123,7 +123,7 @@ func HTML2Text(html string) string {
123123
switch {
124124
// skip new lines and spaces adding a single space if not there yet
125125
case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029, // new lines
126-
r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
126+
r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
127127
writeSpace(outBuf)
128128
continue
129129

@@ -168,31 +168,32 @@ func HTML2Text(html string) string {
168168

169169
case r == '>': // end of a tag
170170
shouldOutput = true
171-
tagName := strings.ToLower(html[tagStart:i])
171+
tag := html[tagStart:i]
172+
tagNameLowercase := strings.ToLower(tag)
172173

173-
if tagName == "/ul" {
174+
if tagNameLowercase == "/ul" {
174175
outBuf.WriteString(lbr)
175-
} else if tagName == "li" || tagName == "li/" {
176+
} else if tagNameLowercase == "li" || tagNameLowercase == "li/" {
176177
outBuf.WriteString(lbr)
177-
} else if headersRE.MatchString(tagName) {
178+
} else if headersRE.MatchString(tagNameLowercase) {
178179
if canPrintNewline {
179180
outBuf.WriteString(lbr + lbr)
180181
}
181182
canPrintNewline = false
182-
} else if tagName == "br" || tagName == "br/" {
183+
} else if tagNameLowercase == "br" || tagNameLowercase == "br/" {
183184
// new line
184185
outBuf.WriteString(lbr)
185-
} else if tagName == "p" || tagName == "/p" {
186+
} else if tagNameLowercase == "p" || tagNameLowercase == "/p" {
186187
if canPrintNewline {
187188
outBuf.WriteString(lbr + lbr)
188189
}
189190
canPrintNewline = false
190-
} else if badTagnamesRE.MatchString(tagName) {
191+
} else if badTagnamesRE.MatchString(tagNameLowercase) {
191192
// unwanted block
192193
badTagStackDepth++
193194

194195
// parse link href
195-
m := linkTagRE.FindStringSubmatch(tagName)
196+
m := linkTagRE.FindStringSubmatch(tag)
196197
if len(m) == 4 {
197198
link := m[2]
198199
if len(link) == 0 {
@@ -203,8 +204,8 @@ func HTML2Text(html string) string {
203204
outBuf.WriteString(HTMLEntitiesToText(link))
204205
}
205206
}
206-
} else if len(tagName) > 0 && tagName[0] == '/' &&
207-
badTagnamesRE.MatchString(tagName[1:]) {
207+
} else if len(tagNameLowercase) > 0 && tagNameLowercase[0] == '/' &&
208+
badTagnamesRE.MatchString(tagNameLowercase[1:]) {
208209
// end of unwanted block
209210
badTagStackDepth--
210211
}

html2text_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ func TestHTML2Text(t *testing.T) {
1616
So(HTML2Text(`click <a class="x" href="test">here</a>`), ShouldEqual, "click test")
1717
So(HTML2Text(`click <a href="ents/&apos;x&apos;">here</a>`), ShouldEqual, "click ents/'x'")
1818
So(HTML2Text(`click <a href="javascript:void(0)">here</a>`), ShouldEqual, "click ")
19+
So(HTML2Text(`click <a href="http://bit.ly/2n4wXRs">news</a>`), ShouldEqual, "click http://bit.ly/2n4wXRs")
1920
})
2021

2122
Convey("Inlines", func() {

0 commit comments

Comments
 (0)