Skip to content

Commit 7942a5f

Browse files
Cleanup URI implementations, move parts into proper stdlibs
1 parent 41d0624 commit 7942a5f

File tree

3 files changed

+148
-121
lines changed

3 files changed

+148
-121
lines changed

libraries/common/char.effekt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ def digitValue(char: Char, base: Int): Int / Exception[WrongFormat] = {
5252
digit
5353
}
5454

55+
/// Encodes a number in the range 0..15 as a hex character.
56+
///
57+
/// Panics on all other numbers.
58+
def hexDigit(for: Int): Char = for match {
59+
case i and i >= 0 and i < 10 => ('0'.toInt + i).toChar
60+
case i and i >= 0 and i < 16 => ('A'.toInt + (i - 10)).toChar
61+
case _ => panic(for.show ++ " is not in [0,16).")
62+
}
63+
5564
/// Checks if the given character is an ASCII digit in base 10
5665
/// Prefer using `digitValue(c: Char)` to get the numeric value out.
5766
def isDigit(char: Char): Bool = result[Int, WrongFormat] { digitValue(char) }.isSuccess

libraries/common/io/uri.effekt

Lines changed: 120 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,13 @@
11
import scanner
22
import stream
33

4-
def hexDigit(for: Int): Char = for match {
5-
case i and i >= 0 and i < 10 => ('0'.toInt + i).toChar
6-
case i and i >= 0 and i < 16 => ('A'.toInt + (i - 10)).toChar
7-
case _ => <>
8-
}
9-
104
/// %-encodes the characters for which `shouldEncode` returns true.
115
/// Always %-encodes %.
126
def urlencode(s: String){ shouldEncode: Char => Bool }: String = collectString {
137
def encoded(c: Char): Unit = {
148
do emit('%')
159
val cc = c.toInt
16-
if (cc >= 256){ panic("Unicode not supported") } // TODO
10+
if (cc >= 256 || cc < 0){ panic("Unicode not supported") } // TODO
1711
do emit((cc / 16).hexDigit)
1812
do emit(mod(cc, 16).hexDigit)
1913
}
@@ -24,57 +18,61 @@ def urlencode(s: String){ shouldEncode: Char => Bool }: String = collectString {
2418
}
2519
}
2620

27-
/// gen-delims as per RFC 3986
28-
def isGenDelim(c: Char): Bool = c match {
29-
case ':' => true
30-
case '/' => true
31-
case '?' => true
32-
case '#' => true
33-
case '[' => true
34-
case ']' => true
35-
case '@' => true
36-
case _ => false
37-
}
21+
namespace urichars {
22+
/// gen-delims as per RFC 3986
23+
def isGenDelim(c: Char): Bool = c match {
24+
case ':' => true
25+
case '/' => true
26+
case '?' => true
27+
case '#' => true
28+
case '[' => true
29+
case ']' => true
30+
case '@' => true
31+
case _ => false
32+
}
3833

39-
/// sub-delims as per RFC 3986
40-
def isSubDelim(c: Char): Bool = c match {
41-
case '!' => true
42-
case '$' => true
43-
case '&' => true
44-
case '\'' => true
45-
case '(' => true
46-
case ')' => true
47-
case '*' => true
48-
case '+' => true
49-
case ',' => true
50-
case ';' => true
51-
case '=' => true
52-
case _ => false
34+
/// sub-delims as per RFC 3986
35+
def isSubDelim(c: Char): Bool = c match {
36+
case '!' => true
37+
case '$' => true
38+
case '&' => true
39+
case '\'' => true
40+
case '(' => true
41+
case ')' => true
42+
case '*' => true
43+
case '+' => true
44+
case ',' => true
45+
case ';' => true
46+
case '=' => true
47+
case _ => false
48+
}
49+
50+
/// Unreserved characters as per RFC 3986
51+
def isUnreserved(c: Char): Bool = c match {
52+
case c and c.isAlphanumeric => true
53+
case '-' => true
54+
case '.' => true
55+
case '_' => true
56+
case '~' => true
57+
case _ => false
58+
}
5359
}
5460

55-
/// Encodes the string for urls using %-escapes
61+
/// Encodes the string for urls using %-escapes,
62+
/// escaping only url delimiters and space characters
5663
def urlencodePermissive(s: String): String = urlencode(s){
5764
case '%' => true
5865
case ' ' => true
59-
case c and c.isGenDelim || c.isSubDelim => true
66+
case c and c.urichars::isGenDelim || c.urichars::isSubDelim => true
6067
case _ => false
6168
}
6269

63-
/// Unreserved characters as per RFC 3986
64-
def isUnreserved(c: Char): Bool = c match {
65-
case c and c.isAlphanumeric => true
66-
case '-' => true
67-
case '.' => true
68-
case '_' => true
69-
case '~' => true
70-
case _ => false
71-
}
7270

7371
/// Encodes the string for urls using %-escapes,
7472
/// escaping everything that is not an unreserved character
7573
/// as per RFC 3986.
7674
def urlencode(s: String): String =
77-
urlencode(s){ c => not(c.isUnreserved) }
75+
urlencode(s){ c => not(c.urichars::isUnreserved) }
7876

7977

8078
/// Decodes %-escapes in the given string
@@ -93,112 +91,113 @@ def urldecode(s: String): String = collectString {
9391
}
9492
}
9593

94+
/// Builder style representation of the parts of a URI.
95+
///
96+
/// Many consumers expect the operations to be called in-order.
9697
interface URIBuilder {
98+
/// URI schema, e.g. http, https, ftp, ...
9799
def scheme(s: String): Unit
100+
/// Userinfo part for, e.g. basic auth, e.g. user:letmein, ...
98101
def userinfo(a: String): Unit
102+
/// Hostname (or non-host authority part), e.g. effekt-lang.org, [::1], 127.0.0.1, ...
99103
def host(h: String): Unit
104+
/// Port, e.g. 80, 443, ...
100105
def port(p: Int): Unit
106+
/// Path-part of the URI, commonly something like /index.html
101107
def path(p: String): Unit
108+
/// Query part of the URI, e.g. q=12 for ...?q=12
102109
def query(q: String): Unit
110+
/// Fragment part of the URI, e.g. a1 for ...#a1
103111
def fragment(f: String): Unit
104112
}
105113

106-
def parseScheme(): String / { Scan[Char], stop } = {
107-
with collectString
108-
do emit(readIf{ c => c.isAlphabetic })
109-
readWhile{ c => c.isAlphanumeric || c == '+' || c == '-' || c == '.' }
110-
}
111-
112-
def unread[A, R](c: A){ body: => R / Scan[A] }: R / Scan[A] = {
113-
var read = false
114-
try body() with Scan[A] {
115-
def peek() = if(read) { resume{do peek()} } else { resume{ () => c } }
116-
def skip() = if(read) { resume{do skip[A]()} } else { resume{read = true} }
114+
namespace internal {
115+
def parseScheme(): String / { Scan[Char], stop } = {
116+
with collectString
117+
do emit(readIf{ c => c.isAlphabetic })
118+
readWhile{ c => c.isAlphanumeric || c == '+' || c == '-' || c == '.' }
117119
}
118-
}
119-
def unread[R](s: String){ body: => R / Scan[Char] }: R / Scan[Char] = {
120-
var pos = 0
121-
try body() with Scan[Char] {
122-
def peek() = if (pos < s.length) { resume{s.unsafeCharAt(pos)} } else { resume{do peek()} }
123-
def skip() = if (pos < s.length) { resume{pos = pos + 1} } else { resume{do skip[Char]()} }
124-
}
125-
}
126120

127-
def parseHostAndPort(): Unit / { URIBuilder, Scan[Char] } = {
128-
try {
129-
do peek[Char]() match {
130-
case '[' => // IP-literal
131-
// this is more permissive than the spec
132-
do host(collectString{ readWhile{ c => c != ']' } } ++ "]")
133-
readIf(']')
134-
case _ =>
135-
do host(collectString{ readWhile{
136-
case '%' => true
137-
case c and c.isUnreserved => true
138-
case c and c.isSubDelim => true
139-
case _ => false
140-
} })
121+
def parseHostAndPort(): Unit / { URIBuilder, Scan[Char] } = {
122+
try {
123+
do peek[Char]() match {
124+
case '[' => // IP-literal
125+
// this is more permissive than the spec
126+
do host(collectString{ readWhile{ c => c != ']' } } ++ "]")
127+
readIf(']')
128+
case _ =>
129+
do host(collectString{ readWhile{
130+
case '%' => true
131+
case c and c.urichars::isUnreserved => true
132+
case c and c.urichars::isSubDelim => true
133+
case _ => false
134+
} })
135+
}
136+
} with stop { () =>
137+
do host("")
138+
}
139+
attempt{
140+
readIf(':')
141+
do port(readInteger())
142+
}{
143+
// no port
144+
()
141145
}
142-
} with stop { () =>
143-
do host("")
144-
}
145-
attempt{
146-
readIf(':')
147-
do port(readInteger())
148-
}{
149-
// no port
150-
()
151146
}
152-
}
153147

154-
def parseAuthority(): Unit / { URIBuilder, Scan[Char] } = {
155-
// try parsing as userinfo@...
156-
val fst = collectString{ readWhile{
157-
case '%' => true
158-
case ':' => true
159-
case c and c.isUnreserved => true
160-
case c and c.isSubDelim => true
161-
case _ => false
162-
} }
163-
attempt{ // was userinfo
164-
readIf('@')
165-
do userinfo(fst)
166-
parseHostAndPort()
167-
}{ // was not userinfo
168-
with unread(fst)
169-
parseHostAndPort()
148+
def parseAuthority(): Unit / { URIBuilder, Scan[Char] } = {
149+
// try parsing as userinfo@...
150+
val fst = collectString{ readWhile{
151+
case '%' => true
152+
case ':' => true
153+
case c and c.urichars::isUnreserved => true
154+
case c and c.urichars::isSubDelim => true
155+
case _ => false
156+
} }
157+
attempt{ // was userinfo
158+
readIf('@')
159+
do userinfo(fst)
160+
parseHostAndPort()
161+
}{ // was not userinfo
162+
with unread(fst)
163+
parseHostAndPort()
164+
}
170165
}
171-
}
172166

173-
def parsePathQueryFragment(): Unit / { URIBuilder, Scan[Char], Exception[WrongFormat] } = {
174-
do path(collectString{ readWhile{
175-
case '?' => false
176-
case '#' => false
177-
case _ => true
178-
}})
179-
boundary{
180-
readIf('?')
181-
do query(collectString{ readWhile{ c => c != '#' }})
182-
}
183-
boundary{
184-
readIf('#')
185-
do fragment(collectString{ readWhile[Char]{ c => true } })
167+
def parsePathQueryFragment(): Unit / { URIBuilder, Scan[Char], Exception[WrongFormat] } = {
168+
do path(collectString{ readWhile{
169+
case '?' => false
170+
case '#' => false
171+
case _ => true
172+
}})
173+
boundary{
174+
readIf('?')
175+
do query(collectString{ readWhile{ c => c != '#' }})
176+
}
177+
boundary{
178+
readIf('#')
179+
do fragment(collectString{ readWhile[Char]{ c => true } })
180+
}
186181
}
187182
}
188183

184+
/// Parse a (non-relative) URI into its parts, causing the respective URIBuilder events.
185+
/// Should at least parse all RFC3986-compliant URIs.
186+
///
187+
/// authority is returned as `host` even when it isn't one.
189188
def parseURI(uri: String): Unit / { URIBuilder, Exception[WrongFormat] } = {
190189
try {
191190
with feed(uri)
192191
with scanner[Char]
193192

194-
do scheme(parseScheme())
193+
do scheme(internal::parseScheme())
195194
readIf(':')
196195

197196
val c = read[Char]()
198197
if (c == '/' and do peek[Char]() == '/'){
199198
// starts with `//`
200199
readIf('/')
201-
parseAuthority()
200+
internal::parseAuthority()
202201
boundary{
203202
do peek[Char]() match {
204203
case '?' => ()
@@ -207,10 +206,10 @@ def parseURI(uri: String): Unit / { URIBuilder, Exception[WrongFormat] } = {
207206
case _ => do raise(WrongFormat(), "Path must be empty or start with / if there is an authority component.")
208207
}
209208
}
210-
parsePathQueryFragment()
209+
internal::parsePathQueryFragment()
211210
} else {
212211
with unread(c)
213-
parsePathQueryFragment()
212+
internal::parsePathQueryFragment()
214213
}
215214
} with stop { () =>
216215
do raise(WrongFormat(), "Could not parse URI")

libraries/common/scanner.effekt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,25 @@ def readInteger(): Int / Scan[Char] =
118118
readDecimal()
119119
}
120120

121+
/// Handle Scan[A] in the body to first read the given character,
122+
/// then continue with the outside scanner.
123+
def unread[A, R](c: A){ body: => R / Scan[A] }: R / Scan[A] = {
124+
var read = false
125+
try body() with Scan[A] {
126+
def peek() = if(read) { resume{do peek()} } else { resume{ () => c } }
127+
def skip() = if(read) { resume{do skip[A]()} } else { resume{read = true} }
128+
}
129+
}
130+
131+
/// Handle Scan[Char] in the body to first read the given string,
132+
/// then continue with the outside scanner.
133+
def unread[R](s: String){ body: => R / Scan[Char] }: R / Scan[Char] = {
134+
var pos = 0
135+
try body() with Scan[Char] {
136+
def peek() = if (pos < s.length) { resume{s.unsafeCharAt(pos)} } else { resume{do peek()} }
137+
def skip() = if (pos < s.length) { resume{pos = pos + 1} } else { resume{do skip[Char]()} }
138+
}
139+
}
121140

122141
namespace returning {
123142

0 commit comments

Comments
 (0)