1
+ import scanner
2
+ import stream
3
+
4
+ def hexDigit(for: Int): Char = for match {
5
+ case i and i >= 0 and i < 10 => ('0'.toInt + i).toChar
6
+ case i and i >= 0 and i < 16 => ('A'.toInt + (i - 10)).toChar
7
+ case _ => <>
8
+ }
9
+
10
+ /// %-encodes the characters for which `shouldEncode` returns true.
11
+ /// Always %-encodes %.
12
+ def urlencode(s: String){ shouldEncode: Char => Bool }: String = collectString {
13
+ def encoded(c: Char): Unit = {
14
+ do emit('%')
15
+ val cc = c.toInt
16
+ if (cc >= 256){ panic("Unicode not supported") } // TODO
17
+ do emit((cc / 16).hexDigit)
18
+ do emit(mod(cc, 16).hexDigit)
19
+ }
20
+ for[Char]{ s.each }{
21
+ case '%' => encoded('%')
22
+ case c and c.shouldEncode => encoded(c)
23
+ case other => do emit(other)
24
+ }
25
+ }
26
+
27
+ /// gen-delims as per RFC 3986
28
+ def isGenDelim(c: Char): Bool = c match {
29
+ case ':' => true
30
+ case '/' => true
31
+ case '?' => true
32
+ case '#' => true
33
+ case '[' => true
34
+ case ']' => true
35
+ case '@' => true
36
+ case _ => false
37
+ }
38
+
39
+ /// sub-delims as per RFC 3986
40
+ def isSubDelim(c: Char): Bool = c match {
41
+ case '!' => true
42
+ case '$' => true
43
+ case '&' => true
44
+ case '\'' => true
45
+ case '(' => true
46
+ case ')' => true
47
+ case '*' => true
48
+ case '+' => true
49
+ case ',' => true
50
+ case ';' => true
51
+ case '=' => true
52
+ case _ => false
53
+ }
54
+
55
+ /// Encodes the string for urls using %-escapes
56
+ def urlencode(s: String): String = urlencode(s){
57
+ case '%' => true
58
+ case ' ' => true
59
+ case c and c.isGenDelim || c.isSubDelim => true
60
+ case _ => false
61
+ }
62
+
63
+ /// Unreserved characters as per RFC 3986
64
+ def isUnreserved(c: Char): Bool = c match {
65
+ case c and c.isAlphanumeric => true
66
+ case '-' => true
67
+ case '.' => true
68
+ case '_' => true
69
+ case '~' => true
70
+ case _ => false
71
+ }
72
+
73
+ /// Encodes the string for urls using %-escapes,
74
+ /// escaping everything that is not an unreserved character
75
+ /// as per RFC 3986.
76
+ def urlencodeStrict(s: String): String =
77
+ urlencode(s){ c => not(c.isUnreserved) }
78
+
79
+
80
+ /// Decodes %-escapes in the given string
81
+ def urldecode(s: String): String = collectString {
82
+ with feed(s)
83
+ with scanner[Char]
84
+
85
+ exhaustively{
86
+ do read[Char]() match {
87
+ case '%' =>
88
+ val a = readHexDigit()
89
+ val b = readHexDigit()
90
+ do emit((a * 16 + b).toChar)
91
+ case o => do emit(o)
92
+ }
93
+ }
94
+ }
95
+
96
+ interface URIBuilder {
97
+ def scheme(s: String): Unit
98
+ def userinfo(a: String): Unit
99
+ def host(h: String): Unit
100
+ def port(p: Int): Unit
101
+ def path(p: String): Unit
102
+ def query(q: String): Unit
103
+ def fragment(f: String): Unit
104
+ }
105
+
106
+ def parseScheme(): String / { Scan[Char], stop } = {
107
+ with collectString
108
+ do emit(readIf{ c => c.isAlphabetic })
109
+ readWhile{ c => c.isAlphanumeric || c == '+' || c == '-' || c == '.' }
110
+ }
111
+
112
+ def unread[A, R](c: A){ body: => R / Scan[A] }: R / Scan[A] = {
113
+ var read = false
114
+ try body() with Scan[A] {
115
+ def peek() = if(read) { resume{do peek()} } else { resume{ () => c } }
116
+ def skip() = if(read) { resume{do skip[A]()} } else { resume{read = true} }
117
+ }
118
+ }
119
+ def unread[R](s: String){ body: => R / Scan[Char] }: R / Scan[Char] = {
120
+ var pos = 0
121
+ try body() with Scan[Char] {
122
+ def peek() = if (pos < s.length) { resume{s.unsafeCharAt(pos)} } else { resume{do peek()} }
123
+ def skip() = if (pos < s.length) { resume{pos = pos + 1} } else { resume{do skip[Char]()} }
124
+ }
125
+ }
126
+
127
+ def parseHostAndPort(): Unit / { URIBuilder, Scan[Char] } = {
128
+ try {
129
+ do peek[Char]() match {
130
+ case '[' => // IP-literal
131
+ // this is more permissive than the spec
132
+ do host(collectString{ readWhile{ c => c != ']' } } ++ "]")
133
+ readIf(']')
134
+ case _ =>
135
+ do host(collectString{ readWhile{
136
+ case '%' => true
137
+ case c and c.isUnreserved => true
138
+ case c and c.isSubDelim => true
139
+ case _ => false
140
+ } })
141
+ }
142
+ } with stop { () =>
143
+ do host("")
144
+ }
145
+ attempt{
146
+ readIf(':')
147
+ do port(readInteger())
148
+ }{
149
+ // no port
150
+ ()
151
+ }
152
+ }
153
+
154
+ def parseAuthority(): Unit / { URIBuilder, Scan[Char] } = {
155
+ // try parsing as userinfo@...
156
+ val fst = collectString{ readWhile{
157
+ case '%' => true
158
+ case ':' => true
159
+ case c and c.isUnreserved => true
160
+ case c and c.isSubDelim => true
161
+ case _ => false
162
+ } }
163
+ attempt{ // was userinfo
164
+ readIf('@')
165
+ do userinfo(fst)
166
+ parseHostAndPort()
167
+ }{ // was not userinfo
168
+ with unread(fst)
169
+ parseHostAndPort()
170
+ }
171
+ }
172
+
173
+ def parsePathQueryFragment(): Unit / { URIBuilder, Scan[Char], Exception[WrongFormat] } = {
174
+ do path(collectString{ readWhile{
175
+ case '?' => false
176
+ case '#' => false
177
+ case _ => true
178
+ }})
179
+ boundary{
180
+ readIf('?')
181
+ do query(collectString{ readWhile{ c => c != '#' }})
182
+ }
183
+ boundary{
184
+ readIf('#')
185
+ do fragment(collectString{ readWhile[Char]{ c => true } })
186
+ }
187
+ }
188
+
189
+ def parseURI(uri: String): Unit / { URIBuilder, Exception[WrongFormat] } = {
190
+ try {
191
+ with feed(uri)
192
+ with scanner[Char]
193
+
194
+ do scheme(parseScheme())
195
+ readIf(':')
196
+
197
+ val c = read[Char]()
198
+ if (c == '/' and do peek[Char]() == '/'){
199
+ // starts with `//`
200
+ readIf('/')
201
+ parseAuthority()
202
+ boundary{
203
+ do peek[Char]() match {
204
+ case '?' => ()
205
+ case '#' => ()
206
+ case '/' => ()
207
+ case _ => do raise(WrongFormat(), "Path must be empty or start with / if there is an authority component.")
208
+ }
209
+ }
210
+ parsePathQueryFragment()
211
+ } else {
212
+ with unread(c)
213
+ parsePathQueryFragment()
214
+ }
215
+ } with stop { () =>
216
+ do raise(WrongFormat(), "Could not parse URI")
217
+ }
218
+ }
219
+
220
+ namespace example {
221
+ def report(uri: String): Unit = {
222
+ with on[WrongFormat].panic
223
+ println(uri)
224
+ try parseURI(uri) with URIBuilder {
225
+ def scheme(s) = resume(println(" Scheme: " ++ s))
226
+ def userinfo(u) = resume(println(" Userinfo: " ++ u))
227
+ def host(h) = resume(println(" Host: " ++ h))
228
+ def path(p) = resume(println(" Path: " ++ p))
229
+ def port(p) = resume(println(" Port: " ++ p.show))
230
+ def query(q) = resume(println(" Query: " ++ q))
231
+ def fragment(q) = resume(println(" Fragment: " ++ q))
232
+ }
233
+ }
234
+ def main() = {
235
+ println(urldecode("%2F%20%20!"))
236
+ println(urldecode(urlencode("Hallo Welt!/&^$@^*(&$)(*!_!_+\")")))
237
+ println(urlencode("Hallo Welt!/&^$@^*(&$)(*!_!_+\")"))
238
+ println(urldecode(urlencodeStrict("Hallo Welt!/&^$@^*(&$)(*!_!_+\")")))
239
+ println(urlencodeStrict("Hallo Welt!/&^$@^*(&$)(*!_!_+\")"))
240
+
241
+ // examples from the spec
242
+ report("ftp://ftp.is.co.za/rfc/rfc1808.txt")
243
+ report("http://www.ietf.org/rfc/rfc2396.txt")
244
+ report("ldap://[2001:db8::7]/c=GB?objectClass?one")
245
+ report("mailto:
[email protected] ")
246
+ report("news:comp.infosystems.www.servers.unix")
247
+ report("tel:+1-816-555-1212")
248
+ report("telnet://192.0.2.16:80/")
249
+ report("urn:oasis:names:specification:docbook:dtd:xml:4.1")
250
+ }
251
+ }
0 commit comments