1
1
import scanner
2
2
import stream
3
3
4
- def hexDigit(for: Int): Char = for match {
5
- case i and i >= 0 and i < 10 => ('0'.toInt + i).toChar
6
- case i and i >= 0 and i < 16 => ('A'.toInt + (i - 10)).toChar
7
- case _ => <>
8
- }
9
-
10
4
/// %-encodes the characters for which `shouldEncode` returns true.
11
5
/// Always %-encodes %.
12
6
def urlencode(s: String){ shouldEncode: Char => Bool }: String = collectString {
13
7
def encoded(c: Char): Unit = {
14
8
do emit('%')
15
9
val cc = c.toInt
16
- if (cc >= 256){ panic("Unicode not supported") } // TODO
10
+ if (cc >= 256 || cc < 0 ){ panic("Unicode not supported") } // TODO
17
11
do emit((cc / 16).hexDigit)
18
12
do emit(mod(cc, 16).hexDigit)
19
13
}
@@ -24,57 +18,61 @@ def urlencode(s: String){ shouldEncode: Char => Bool }: String = collectString {
24
18
}
25
19
}
26
20
27
- /// gen-delims as per RFC 3986
28
- def isGenDelim(c: Char): Bool = c match {
29
- case ':' => true
30
- case '/' => true
31
- case '?' => true
32
- case '#' => true
33
- case '[' => true
34
- case ']' => true
35
- case '@' => true
36
- case _ => false
37
- }
21
+ namespace urichars {
22
+ /// gen-delims as per RFC 3986
23
+ def isGenDelim(c: Char): Bool = c match {
24
+ case ':' => true
25
+ case '/' => true
26
+ case '?' => true
27
+ case '#' => true
28
+ case '[' => true
29
+ case ']' => true
30
+ case '@' => true
31
+ case _ => false
32
+ }
38
33
39
- /// sub-delims as per RFC 3986
40
- def isSubDelim(c: Char): Bool = c match {
41
- case '!' => true
42
- case '$' => true
43
- case '&' => true
44
- case '\'' => true
45
- case '(' => true
46
- case ')' => true
47
- case '*' => true
48
- case '+' => true
49
- case ',' => true
50
- case ';' => true
51
- case '=' => true
52
- case _ => false
34
+ /// sub-delims as per RFC 3986
35
+ def isSubDelim(c: Char): Bool = c match {
36
+ case '!' => true
37
+ case '$' => true
38
+ case '&' => true
39
+ case '\'' => true
40
+ case '(' => true
41
+ case ')' => true
42
+ case '*' => true
43
+ case '+' => true
44
+ case ',' => true
45
+ case ';' => true
46
+ case '=' => true
47
+ case _ => false
48
+ }
49
+
50
+ /// Unreserved characters as per RFC 3986
51
+ def isUnreserved(c: Char): Bool = c match {
52
+ case c and c.isAlphanumeric => true
53
+ case '-' => true
54
+ case '.' => true
55
+ case '_' => true
56
+ case '~' => true
57
+ case _ => false
58
+ }
53
59
}
54
60
55
- /// Encodes the string for urls using %-escapes
61
+ /// Encodes the string for urls using %-escapes,
62
+ /// escaping only url delimiters and space characters
56
63
def urlencodePermissive(s: String): String = urlencode(s){
57
64
case '%' => true
58
65
case ' ' => true
59
- case c and c.isGenDelim || c.isSubDelim => true
66
+ case c and c.urichars:: isGenDelim || c.urichars:: isSubDelim => true
60
67
case _ => false
61
68
}
62
69
63
- /// Unreserved characters as per RFC 3986
64
- def isUnreserved(c: Char): Bool = c match {
65
- case c and c.isAlphanumeric => true
66
- case '-' => true
67
- case '.' => true
68
- case '_' => true
69
- case '~' => true
70
- case _ => false
71
- }
72
70
73
71
/// Encodes the string for urls using %-escapes,
74
72
/// escaping everything that is not an unreserved character
75
73
/// as per RFC 3986.
76
74
def urlencode(s: String): String =
77
- urlencode(s){ c => not(c.isUnreserved) }
75
+ urlencode(s){ c => not(c.urichars:: isUnreserved) }
78
76
79
77
80
78
/// Decodes %-escapes in the given string
@@ -93,112 +91,113 @@ def urldecode(s: String): String = collectString {
93
91
}
94
92
}
95
93
94
+ /// Builder style representation of the parts of a URI.
95
+ ///
96
+ /// Many consumers expect the operations to be called in-order.
96
97
interface URIBuilder {
98
+ /// URI schema, e.g. http, https, ftp, ...
97
99
def scheme(s: String): Unit
100
+ /// Userinfo part for, e.g. basic auth, e.g. user:letmein, ...
98
101
def userinfo(a: String): Unit
102
+ /// Hostname (or non-host authority part), e.g. effekt-lang.org, [::1], 127.0.0.1, ...
99
103
def host(h: String): Unit
104
+ /// Port, e.g. 80, 443, ...
100
105
def port(p: Int): Unit
106
+ /// Path-part of the URI, commonly something like /index.html
101
107
def path(p: String): Unit
108
+ /// Query part of the URI, e.g. q=12 for ...?q=12
102
109
def query(q: String): Unit
110
+ /// Fragment part of the URI, e.g. a1 for ...#a1
103
111
def fragment(f: String): Unit
104
112
}
105
113
106
- def parseScheme(): String / { Scan[Char], stop } = {
107
- with collectString
108
- do emit(readIf{ c => c.isAlphabetic })
109
- readWhile{ c => c.isAlphanumeric || c == '+' || c == '-' || c == '.' }
110
- }
111
-
112
- def unread[A, R](c: A){ body: => R / Scan[A] }: R / Scan[A] = {
113
- var read = false
114
- try body() with Scan[A] {
115
- def peek() = if(read) { resume{do peek()} } else { resume{ () => c } }
116
- def skip() = if(read) { resume{do skip[A]()} } else { resume{read = true} }
114
+ namespace internal {
115
+ def parseScheme(): String / { Scan[Char], stop } = {
116
+ with collectString
117
+ do emit(readIf{ c => c.isAlphabetic })
118
+ readWhile{ c => c.isAlphanumeric || c == '+' || c == '-' || c == '.' }
117
119
}
118
- }
119
- def unread[R](s: String){ body: => R / Scan[Char] }: R / Scan[Char] = {
120
- var pos = 0
121
- try body() with Scan[Char] {
122
- def peek() = if (pos < s.length) { resume{s.unsafeCharAt(pos)} } else { resume{do peek()} }
123
- def skip() = if (pos < s.length) { resume{pos = pos + 1} } else { resume{do skip[Char]()} }
124
- }
125
- }
126
120
127
- def parseHostAndPort(): Unit / { URIBuilder, Scan[Char] } = {
128
- try {
129
- do peek[Char]() match {
130
- case '[' => // IP-literal
131
- // this is more permissive than the spec
132
- do host(collectString{ readWhile{ c => c != ']' } } ++ "]")
133
- readIf(']')
134
- case _ =>
135
- do host(collectString{ readWhile{
136
- case '%' => true
137
- case c and c.isUnreserved => true
138
- case c and c.isSubDelim => true
139
- case _ => false
140
- } })
121
+ def parseHostAndPort(): Unit / { URIBuilder, Scan[Char] } = {
122
+ try {
123
+ do peek[Char]() match {
124
+ case '[' => // IP-literal
125
+ // this is more permissive than the spec
126
+ do host(collectString{ readWhile{ c => c != ']' } } ++ "]")
127
+ readIf(']')
128
+ case _ =>
129
+ do host(collectString{ readWhile{
130
+ case '%' => true
131
+ case c and c.urichars::isUnreserved => true
132
+ case c and c.urichars::isSubDelim => true
133
+ case _ => false
134
+ } })
135
+ }
136
+ } with stop { () =>
137
+ do host("")
138
+ }
139
+ attempt{
140
+ readIf(':')
141
+ do port(readInteger())
142
+ }{
143
+ // no port
144
+ ()
141
145
}
142
- } with stop { () =>
143
- do host("")
144
- }
145
- attempt{
146
- readIf(':')
147
- do port(readInteger())
148
- }{
149
- // no port
150
- ()
151
146
}
152
- }
153
147
154
- def parseAuthority(): Unit / { URIBuilder, Scan[Char] } = {
155
- // try parsing as userinfo@...
156
- val fst = collectString{ readWhile{
157
- case '%' => true
158
- case ':' => true
159
- case c and c.isUnreserved => true
160
- case c and c.isSubDelim => true
161
- case _ => false
162
- } }
163
- attempt{ // was userinfo
164
- readIf('@')
165
- do userinfo(fst)
166
- parseHostAndPort()
167
- }{ // was not userinfo
168
- with unread(fst)
169
- parseHostAndPort()
148
+ def parseAuthority(): Unit / { URIBuilder, Scan[Char] } = {
149
+ // try parsing as userinfo@...
150
+ val fst = collectString{ readWhile{
151
+ case '%' => true
152
+ case ':' => true
153
+ case c and c.urichars::isUnreserved => true
154
+ case c and c.urichars::isSubDelim => true
155
+ case _ => false
156
+ } }
157
+ attempt{ // was userinfo
158
+ readIf('@')
159
+ do userinfo(fst)
160
+ parseHostAndPort()
161
+ }{ // was not userinfo
162
+ with unread(fst)
163
+ parseHostAndPort()
164
+ }
170
165
}
171
- }
172
166
173
- def parsePathQueryFragment(): Unit / { URIBuilder, Scan[Char], Exception[WrongFormat] } = {
174
- do path(collectString{ readWhile{
175
- case '?' => false
176
- case '#' => false
177
- case _ => true
178
- }})
179
- boundary{
180
- readIf('?')
181
- do query(collectString{ readWhile{ c => c != '#' }})
182
- }
183
- boundary{
184
- readIf('#')
185
- do fragment(collectString{ readWhile[Char]{ c => true } })
167
+ def parsePathQueryFragment(): Unit / { URIBuilder, Scan[Char], Exception[WrongFormat] } = {
168
+ do path(collectString{ readWhile{
169
+ case '?' => false
170
+ case '#' => false
171
+ case _ => true
172
+ }})
173
+ boundary{
174
+ readIf('?')
175
+ do query(collectString{ readWhile{ c => c != '#' }})
176
+ }
177
+ boundary{
178
+ readIf('#')
179
+ do fragment(collectString{ readWhile[Char]{ c => true } })
180
+ }
186
181
}
187
182
}
188
183
184
+ /// Parse a (non-relative) URI into its parts, causing the respective URIBuilder events.
185
+ /// Should at least parse all RFC3986-compliant URIs.
186
+ ///
187
+ /// authority is returned as `host` even when it isn't one.
189
188
def parseURI(uri: String): Unit / { URIBuilder, Exception[WrongFormat] } = {
190
189
try {
191
190
with feed(uri)
192
191
with scanner[Char]
193
192
194
- do scheme(parseScheme())
193
+ do scheme(internal:: parseScheme())
195
194
readIf(':')
196
195
197
196
val c = read[Char]()
198
197
if (c == '/' and do peek[Char]() == '/'){
199
198
// starts with `//`
200
199
readIf('/')
201
- parseAuthority()
200
+ internal:: parseAuthority()
202
201
boundary{
203
202
do peek[Char]() match {
204
203
case '?' => ()
@@ -207,10 +206,10 @@ def parseURI(uri: String): Unit / { URIBuilder, Exception[WrongFormat] } = {
207
206
case _ => do raise(WrongFormat(), "Path must be empty or start with / if there is an authority component.")
208
207
}
209
208
}
210
- parsePathQueryFragment()
209
+ internal:: parsePathQueryFragment()
211
210
} else {
212
211
with unread(c)
213
- parsePathQueryFragment()
212
+ internal:: parsePathQueryFragment()
214
213
}
215
214
} with stop { () =>
216
215
do raise(WrongFormat(), "Could not parse URI")
0 commit comments