Skip to content

Commit a51a35a

Browse files
Steven Scottgaryburd
authored andcommitted
Improve header parsing code
Because the net/http server removes \r\n from multi-line header values, there's no need to to check for \r or \n when skipping whitespace in headers (see https://godoc.org/net/textproto#Reader.ReadMIMEHeader). Given this fact, the whitespace test can be simplified to b == ' ' || b == '\t'. There's no need for the isSpaceOctet bit field in octetTypes. The isTokenOctet bit field is the only bit field remaining after the removal of isSpaceOctet. Simplify the code by replacing the isTokenOctet bit test in octetTypes with an array of booleans called isTokenOctet. Declare isTokenOctet as a composite literal instead of constructing it at runtime. Add documentation to core functions for parsing HTTP headers.
1 parent 3130e8d commit a51a35a

File tree

2 files changed

+90
-43
lines changed

2 files changed

+90
-43
lines changed

util.go

Lines changed: 89 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -31,68 +31,113 @@ func generateChallengeKey() (string, error) {
3131
return base64.StdEncoding.EncodeToString(p), nil
3232
}
3333

34-
// Octet types from RFC 2616.
35-
var octetTypes [256]byte
36-
37-
const (
38-
isTokenOctet = 1 << iota
39-
isSpaceOctet
40-
)
41-
42-
func init() {
43-
// From RFC 2616
44-
//
45-
// OCTET = <any 8-bit sequence of data>
46-
// CHAR = <any US-ASCII character (octets 0 - 127)>
47-
// CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
48-
// CR = <US-ASCII CR, carriage return (13)>
49-
// LF = <US-ASCII LF, linefeed (10)>
50-
// SP = <US-ASCII SP, space (32)>
51-
// HT = <US-ASCII HT, horizontal-tab (9)>
52-
// <"> = <US-ASCII double-quote mark (34)>
53-
// CRLF = CR LF
54-
// LWS = [CRLF] 1*( SP | HT )
55-
// TEXT = <any OCTET except CTLs, but including LWS>
56-
// separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <">
57-
// | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT
58-
// token = 1*<any CHAR except CTLs or separators>
59-
// qdtext = <any TEXT except <">>
60-
61-
for c := 0; c < 256; c++ {
62-
var t byte
63-
isCtl := c <= 31 || c == 127
64-
isChar := 0 <= c && c <= 127
65-
isSeparator := strings.IndexRune(" \t\"(),/:;<=>?@[]\\{}", rune(c)) >= 0
66-
if strings.IndexRune(" \t\r\n", rune(c)) >= 0 {
67-
t |= isSpaceOctet
68-
}
69-
if isChar && !isCtl && !isSeparator {
70-
t |= isTokenOctet
71-
}
72-
octetTypes[c] = t
73-
}
34+
// Token octets per RFC 2616.
35+
var isTokenOctet = [256]bool{
36+
'!': true,
37+
'#': true,
38+
'$': true,
39+
'%': true,
40+
'&': true,
41+
'\'': true,
42+
'*': true,
43+
'+': true,
44+
'-': true,
45+
'.': true,
46+
'0': true,
47+
'1': true,
48+
'2': true,
49+
'3': true,
50+
'4': true,
51+
'5': true,
52+
'6': true,
53+
'7': true,
54+
'8': true,
55+
'9': true,
56+
'A': true,
57+
'B': true,
58+
'C': true,
59+
'D': true,
60+
'E': true,
61+
'F': true,
62+
'G': true,
63+
'H': true,
64+
'I': true,
65+
'J': true,
66+
'K': true,
67+
'L': true,
68+
'M': true,
69+
'N': true,
70+
'O': true,
71+
'P': true,
72+
'Q': true,
73+
'R': true,
74+
'S': true,
75+
'T': true,
76+
'U': true,
77+
'W': true,
78+
'V': true,
79+
'X': true,
80+
'Y': true,
81+
'Z': true,
82+
'^': true,
83+
'_': true,
84+
'`': true,
85+
'a': true,
86+
'b': true,
87+
'c': true,
88+
'd': true,
89+
'e': true,
90+
'f': true,
91+
'g': true,
92+
'h': true,
93+
'i': true,
94+
'j': true,
95+
'k': true,
96+
'l': true,
97+
'm': true,
98+
'n': true,
99+
'o': true,
100+
'p': true,
101+
'q': true,
102+
'r': true,
103+
's': true,
104+
't': true,
105+
'u': true,
106+
'v': true,
107+
'w': true,
108+
'x': true,
109+
'y': true,
110+
'z': true,
111+
'|': true,
112+
'~': true,
74113
}
75114

115+
// skipSpace returns a slice of the string s with all leading RFC 2616 linear
116+
// whitespace removed.
76117
func skipSpace(s string) (rest string) {
77118
i := 0
78119
for ; i < len(s); i++ {
79-
if octetTypes[s[i]]&isSpaceOctet == 0 {
120+
if b := s[i]; b != ' ' && b != '\t' {
80121
break
81122
}
82123
}
83124
return s[i:]
84125
}
85126

127+
// nextToken returns the leading RFC 2616 token of s and the string following
128+
// the token.
86129
func nextToken(s string) (token, rest string) {
87130
i := 0
88131
for ; i < len(s); i++ {
89-
if octetTypes[s[i]]&isTokenOctet == 0 {
132+
if !isTokenOctet[s[i]] {
90133
break
91134
}
92135
}
93136
return s[:i], s[i:]
94137
}
95138

139+
// nextTokenOrQuoted returns the leading token or quoted string per RFC 2616
140+
// and the string following the token or quoted string.
96141
func nextTokenOrQuoted(s string) (value string, rest string) {
97142
if !strings.HasPrefix(s, "\"") {
98143
return nextToken(s)
@@ -128,7 +173,8 @@ func nextTokenOrQuoted(s string) (value string, rest string) {
128173
return "", ""
129174
}
130175

131-
// equalASCIIFold returns true if s is equal to t with ASCII case folding.
176+
// equalASCIIFold returns true if s is equal to t with ASCII case folding as
177+
// defined in RFC 4790.
132178
func equalASCIIFold(s, t string) bool {
133179
for s != "" && t != "" {
134180
sr, size := utf8.DecodeRuneInString(s)

util_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ var equalASCIIFoldTests = []struct {
1717
{"WebSocket", "websocket", true},
1818
{"websocket", "WebSocket", true},
1919
{"Öyster", "öyster", false},
20+
{"WebSocket", "WetSocket", false},
2021
}
2122

2223
func TestEqualASCIIFold(t *testing.T) {

0 commit comments

Comments
 (0)