From 0e5f07558c137e1c33536a26398eec31fc2becef Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 23 Jan 2026 20:02:48 -0300 Subject: [PATCH 01/30] feat: introduce data type for HTTP --- src/Std/Internal/Http/Data.lean | 1 + src/Std/Internal/Http/Data/URI.lean | 75 ++ src/Std/Internal/Http/Data/URI/Basic.lean | 729 ++++++++++++++++ src/Std/Internal/Http/Data/URI/Encoding.lean | 575 +++++++++++++ src/Std/Internal/Http/Data/URI/Parser.lean | 399 +++++++++ tests/lean/run/async_http_uri.lean | 825 +++++++++++++++++++ 6 files changed, 2604 insertions(+) create mode 100644 src/Std/Internal/Http/Data/URI.lean create mode 100644 src/Std/Internal/Http/Data/URI/Basic.lean create mode 100644 src/Std/Internal/Http/Data/URI/Encoding.lean create mode 100644 src/Std/Internal/Http/Data/URI/Parser.lean create mode 100644 tests/lean/run/async_http_uri.lean diff --git a/src/Std/Internal/Http/Data.lean b/src/Std/Internal/Http/Data.lean index db2de60eb6bc..653e05915346 100644 --- a/src/Std/Internal/Http/Data.lean +++ b/src/Std/Internal/Http/Data.lean @@ -13,6 +13,7 @@ public import Std.Internal.Http.Data.Response public import Std.Internal.Http.Data.Status public import Std.Internal.Http.Data.Chunk public import Std.Internal.Http.Data.Headers +public import Std.Internal.Http.Data.URI /-! # HTTP Data Types diff --git a/src/Std/Internal/Http/Data/URI.lean b/src/Std/Internal/Http/Data/URI.lean new file mode 100644 index 000000000000..09a808ea3f2b --- /dev/null +++ b/src/Std/Internal/Http/Data/URI.lean @@ -0,0 +1,75 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Sofia Rodrigues +-/ +module + +prelude +public import Std.Internal.Http.Data.URI.Basic +public import Std.Internal.Http.Data.URI.Parser + +public section + +/-! +# URI + +This module defines the `URI` and `RequestTarget` types that represent and manipulate components of +URIs as defined by RFC 3986. It provides parsing, rendering, and normalization utilities for working +with URIs and request targets in HTTP messages. +-/ + +namespace Std.Http.RequestTarget + +set_option linter.all true + +/-- +Attempts to parse a `RequestTarget` from the given string. +-/ +@[inline] +def parse? (string : String) : Option RequestTarget := + (URI.Parser.parseRequestTarget <* Std.Internal.Parsec.eof).run string.toUTF8 |>.toOption + +/-- +Parses a `RequestTarget` from the given string. Panics if parsing fails. Use `parse?` +if you need a safe option-returning version. +-/ +@[inline] +def parse! (string : String) : RequestTarget := + match parse? string with + | some res => res + | none => panic! "invalid request target" + +/-- +Creates an origin-form request target from a path string. +The path should start with '/' (e.g., "/api/users" or "/search?q=test"). +Panics if the string is not a valid origin-form request target. +-/ +@[inline] +def originForm! (path : String) : RequestTarget := + match parse? path with + | some (.originForm p q f) => .originForm p q f + | _ => panic! s!"invalid origin-form request target: {path}" + +end RequestTarget + +namespace URI + +/-- +Attempts to parse a `URI` from the given string. +-/ +@[inline] +def parse? (string : String) : Option URI := + (Parser.parseURI <* Std.Internal.Parsec.eof).run string.toUTF8 |>.toOption + +/-- +Parses a `URI` from the given string. Panics if parsing fails. Use `parse?` if you need a safe +option-returning version. +-/ +@[inline] +def parse! (string : String) : URI := + match parse? string with + | some res => res + | none => panic! "invalid URI" + +end URI diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean new file mode 100644 index 000000000000..57a5c87f3d29 --- /dev/null +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -0,0 +1,729 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Sofia Rodrigues +-/ +module + +prelude +public import Std.Net +public import Std.Internal.Http.Data.URI.Encoding +public import Std.Internal.Http.Internal + +public section + +/-! +# URI Structure + +This module defines the complete URI structure following RFC 3986, including schemes, authorities, +paths, queries, fragments, and request targets. + +All text components use the encoding types from `Std.Http.URI.Encoding` to ensure proper +percent-encoding is maintained throughout. +-/ + +namespace Std.Http + +set_option linter.all true + +open Internal + +namespace URI + +/-- +URI scheme identifier (e.g., "http", "https", "ftp"). +-/ +abbrev Scheme := { s : String // IsLowerCase s } + +instance : Inhabited Scheme where + default := ⟨"", .isLowerCase_empty⟩ + +/-- +User information component containing the username and optional password. Both fields store decoded +(unescaped) values. +-/ +structure UserInfo where + /-- + The username (decoded). + -/ + username : String + + /-- + The optional password (decoded). + -/ + password : Option String +deriving Inhabited, Repr + +/-- +? +-/ +abbrev DomainName := { s : String // IsLowerCase s } + +/-- +Host component of a URI, supporting domain names and IP addresses. +-/ +inductive Host + /-- + A domain name (lowercase-normalized). + -/ + | name (name : DomainName) + + /-- + An IPv4 address. + -/ + | ipv4 (ipv4 : Net.IPv4Addr) + + /-- + An IPv6 address. + -/ + | ipv6 (ipv6 : Net.IPv6Addr) +deriving Inhabited + +instance : Repr Host where + reprPrec x prec := + let nestPrec := (if prec ≥ 1024 then 1 else 2) + let name := "Std.Http.URI.Host" + + let repr (ctr : String) a := + Repr.addAppParen (Format.nest nestPrec (.text s!"{name}.{ctr}" ++ .line ++ a)).group prec + + match x with + | Host.name a => repr "name" (reprArg a) + | Host.ipv4 a => repr "ipv4" (toString a) + | Host.ipv6 a => repr "ipv6" (toString a) + +instance : ToString Host where + toString + | .name n => n + | .ipv4 addr => toString addr + | .ipv6 addr => s!"[{toString addr}]" + +/-- +TCP port number. +-/ +abbrev Port := UInt16 + +/-- +The authority component of a URI, identifying the network location of the resource. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2 +-/ +structure Authority where + /-- + Optional user information (username and password). + -/ + userInfo : Option UserInfo := none + + /-- + The host identifying the network location. + -/ + host : Host + + /-- + Optional port number for connecting to the host. + -/ + port : Option Port := none +deriving Inhabited, Repr + +instance : ToString Authority where + toString auth := + let userPart := match auth.userInfo with + | none => "" + | some ⟨name, some pass⟩ => s!"{toString name}:{toString pass}@" + | some ⟨name, none⟩ => s!"{toString name}@" + let hostPart := toString auth.host + let portPart := match auth.port with + | none => "" + | some p => s!":{p}" + s!"{userPart}{hostPart}{portPart}" + +namespace Authority +end Authority + +/-- +Hierarchical path component of a URI. Each segment is stored as an `EncodedString` to maintain +proper percent-encoding. +-/ +structure Path where + /-- + The path segments making up the hierarchical structure (each segment is percent-encoded). + -/ + segments : Array EncodedString + + /-- + Whether the path is absolute (begins with '/') or relative. + -/ + absolute : Bool +deriving Inhabited, Repr + +instance : ToString Path where + toString path := + let result := String.intercalate "/" (path.segments.map toString).toList + if path.absolute then "/" ++ result else result + +namespace Path + +/-- +Returns true if the path has no segments. +-/ +def isEmpty (p : Path) : Bool := p.segments.isEmpty + +/-- +Returns the parent path by removing the last segment. If the path is empty, returns the path unchanged. +-/ +def parent (p : Path) : Path := + if p.segments.isEmpty then p + else { p with segments := p.segments.pop } + +/-- +Joins two paths. If the second path is absolute, it is returned as-is. Otherwise, the second path's +segments are appended to the first path. +-/ +def join (p1 : Path) (p2 : Path) : Path := + if p2.absolute then p2 + else { p1 with segments := p1.segments ++ p2.segments } + +/-- +Appends a single segment to the path. The segment will be percent-encoded. +-/ +def append (p : Path) (segment : String) : Path := + { p with segments := p.segments.push (EncodedString.encode segment) } + +/-- +Appends an already-encoded segment to the path. +-/ +def appendEncoded (p : Path) (segment : EncodedString) : Path := + { p with segments := p.segments.push segment } + +/-- +Removes dot segments from the path according to RFC 3986 Section 5.2.4. This handles "." +(current directory) and ".." (parent directory) segments. +-/ +def normalize (p : Path) : Path := + let rec loop (input : List EncodedString) (output : List EncodedString) : List EncodedString := + match input with + | [] => + output.reverse + | segStr :: rest => + if toString segStr == "." then + loop rest output + else if toString segStr == ".." then + match output with + | [] => loop rest [] + | _ :: tail => loop rest tail + else + loop rest (segStr :: output) + + { p with segments := (loop p.segments.toList []).toArray } + +/-- +Returns the path segments as decoded strings. +Segments that cannot be decoded as UTF-8 are returned as their raw encoded form. +-/ +def toDecodedSegments (p : Path) : Array String := + p.segments.map fun seg => + seg.decode.getD (toString seg) + +end Path + +/-- +Query string represented as an array of key-value pairs. Both keys and values are stored as +`EncodedQueryString` for proper application/x-www-form-urlencoded encoding. Values are optional to +support parameters without values (e.g., "?flag"). Order is preserved based on insertion order. +-/ +@[expose] +def Query := Array (EncodedQueryString × Option EncodedQueryString) +deriving Repr, Inhabited + +namespace Query + +/-- +Extracts all unique query parameter names. +-/ +@[expose] +def names (query : Query) : Array EncodedQueryString := + query.map (fun p => p.fst) + |> Array.toList + |> List.eraseDups + |> List.toArray + +/-- +Extracts all query parameter values. +-/ +@[expose] +def values (query : Query) : Array (Option EncodedQueryString) := + query.map (fun p => p.snd) + +/-- +Returns the query as an array of (key, value) pairs. This is an identity function since Query is +already an array of pairs. +-/ +@[expose] +def toArray (query : Query) : Array (EncodedQueryString × Option EncodedQueryString) := + query + +/-- +Formats a query parameter as a string in the format "key" or "key=value". The key and value are +already percent-encoded as `EncodedQueryString`. +-/ +def formatQueryParam (key : EncodedQueryString) (value : Option EncodedQueryString) : String := + match value with + | none => toString key + | some v => s!"{toString key}={toString v}" + +/-- +Finds the first value of a query parameter by key name. Returns `none` if the key is not found. +The value remains encoded as `EncodedQueryString`. +-/ +def find? (query : Query) (key : String) : Option (Option EncodedQueryString) := + let encodedKey := EncodedQueryString.encode key + let matchingKey := Array.find? (fun x => x.fst.toByteArray = encodedKey.toByteArray) query + matchingKey.map (fun x => x.snd) + +/-- +Finds all values of a query parameter by key name. Returns an empty array if the key is not found. +The values remain encoded as `EncodedQueryString`. +-/ +def findAll (query : Query) (key : String) : Array (Option EncodedQueryString) := + let encodedKey := EncodedQueryString.encode key + query.filterMap (fun x => + if x.fst.toByteArray = encodedKey.toByteArray then + some (x.snd) + else none) + +/-- +Adds a query parameter to the query string. +-/ +def insert (query : Query) (key : String) (value : String) : Query := + let encodedKey := EncodedQueryString.encode key + let encodedValue := EncodedQueryString.encode value + query.push (encodedKey, some encodedValue) + +/-- +Adds a query parameter to the query string. +-/ +def insertEncoded (query : Query) (key : EncodedQueryString) (value : Option EncodedQueryString) : Query := + query.push (key, value) + +/-- +Creates an empty query string. +-/ +def empty : Query := #[] + +/-- +Creates a query string from a list of key-value pairs. +-/ +def ofList (pairs : List (EncodedQueryString × Option EncodedQueryString)) : Query := + pairs.toArray + +/-- +Checks if a query parameter exists. +-/ +def contains (query : Query) (key : String) : Bool := + let encodedKey := EncodedQueryString.encode key + query.any (fun x => x.fst.toByteArray = encodedKey.toByteArray) + +/-- +Removes all occurrences of a query parameter by key name. +-/ +def erase (query : Query) (key : String) : Query := + let encodedKey := EncodedQueryString.encode key + -- Filter out matching keys + query.filter (fun x => x.fst.toByteArray ≠ encodedKey.toByteArray) + +/-- +Gets the first value of a query parameter by key name, decoded as a string. +Returns `none` if the key is not found or if the value cannot be decoded as UTF-8. +-/ +def get (query : Query) (key : String) : Option String := + match query.find? key with + | none => none + | some none => some "" -- Key exists but has no value + | some (some encoded) => encoded.decode + +/-- +Gets the first value of a query parameter by key name, decoded as a string. +Returns the default value if the key is not found or if the value cannot be decoded. +-/ +def getD (query : Query) (key : String) (default : String) : String := + query.get key |>.getD default + +/-- +Sets a query parameter, replacing all existing values for that key. +Both key and value will be automatically percent-encoded. +-/ +def set (query : Query) (key : String) (value : String) : Query := + query.erase key |>.insert key value + +/-- +Converts the query to a properly encoded query string format. +Example: "key1=value1&key2=value2&flag" +-/ +def toRawString (query : Query) : String := + let params := query.map (fun (k, v) => formatQueryParam k v) + String.intercalate "&" params.toList + +instance : EmptyCollection Query := + ⟨Query.empty⟩ + +instance : Singleton (String × String) Query := + ⟨fun ⟨k, v⟩ => Query.empty.insert k v⟩ + +instance : Insert (String × String) Query := + ⟨fun ⟨k, v⟩ q => q.insert k v⟩ + +instance : ToString Query where + toString q := + if q.isEmpty then "" else + let encodedParams := q.toList.map fun (key, value) => + Query.formatQueryParam key value + "?" ++ String.intercalate "&" encodedParams + +end Query + +end URI + +/-- +Complete URI structure following RFC 3986. All text components use encoded string types to ensure +proper percent-encoding. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html +-/ +structure URI where + /-- + The URI scheme (e.g., "http", "https", "ftp"). + -/ + scheme : URI.Scheme + + /-- + Optional authority component (user info, host, and port). + -/ + authority : Option URI.Authority + + /-- + The hierarchical path component. + -/ + path : URI.Path + + /-- + Optional query string as key-value pairs. + -/ + query : URI.Query + + /-- + Optional fragment identifier (the part after '#'), percent-encoded. + -/ + fragment : Option String +deriving Repr, Inhabited + +instance : ToString URI where + toString uri := + let schemePart := uri.scheme + let authorityPart := match uri.authority with + | none => "" + | some auth => s!"//{toString auth}" + let pathPart := toString uri.path + let queryPart := toString uri.query + let fragmentPart := uri.fragment.map (fun f => "#" ++ toString (URI.EncodedString.encode f)) |>.getD "" + s!"{schemePart}:{authorityPart}{pathPart}{queryPart}{fragmentPart}" + +namespace URI + +/-- +Fluent builder for constructing URIs. Takes raw (unencoded) strings and handles encoding +automatically when building the final URI. +-/ +structure Builder where + /-- + The URI scheme (e.g., "http", "https"). + -/ + scheme : Option String := none + + /-- + User information (username and optional password). + -/ + userInfo : Option UserInfo := none + + /-- + The host component. + -/ + host : Option Host := none + + /-- + The port number. + -/ + port : Option URI.Port := none + + /-- + Path segments (will be encoded when building). + -/ + pathSegments : Array String := #[] + + /-- + Query parameters as (key, optional value) pairs (will be encoded when building). + -/ + query : Array (String × Option String) := #[] + + /-- + Fragment identifier (will be encoded when building). + -/ + fragment : Option String := none +deriving Inhabited + +namespace Builder + +/-- +Creates an empty URI builder. +-/ +def empty : Builder := {} + +/-- +Sets the URI scheme (e.g., "http", "https"). +-/ +def setScheme (b : Builder) (scheme : String) : Builder := + { b with scheme := some scheme } + +/-- +Sets the user information with username and optional password. +The strings will be automatically percent-encoded. +-/ +def setUserInfo (b : Builder) (username : String) (password : Option String := none) : Builder := + { b with userInfo := some { + username := username + password := password + } + } + +/-- +Sets the host as a domain name. +The domain name will be automatically percent-encoded. +-/ +def setHost (b : Builder) (name : String) : Builder := + { b with host := some (Host.name ⟨name.toLower, IsLowerCase.isLowerCase_toLower⟩) } + +/-- +Sets the host as an IPv4 address. +-/ +def setHostIPv4 (b : Builder) (addr : Net.IPv4Addr) : Builder := + { b with host := some (Host.ipv4 addr) } + +/-- +Sets the host as an IPv6 address. +-/ +def setHostIPv6 (b : Builder) (addr : Net.IPv6Addr) : Builder := + { b with host := some (Host.ipv6 addr) } + +/-- +Sets the port number. +-/ +def setPort (b : Builder) (port : Port) : Builder := + { b with port := some port } + +/-- +Replaces all path segments. Segments will be automatically percent-encoded when building. +-/ +def setPath (b : Builder) (segments : Array String) : Builder := + { b with pathSegments := segments } + +/-- +Appends a single segment to the path. The segment will be automatically percent-encoded when building. +-/ +def appendPathSegment (b : Builder) (segment : String) : Builder := + { b with pathSegments := b.pathSegments.push segment } + +/-- +Adds a query parameter with a value. Both key and value will be automatically percent-encoded when +building. +-/ +def addQueryParam (b : Builder) (key : String) (value : String) : Builder := + { b with query := b.query.push (key, some value) } + +/-- +Adds a query parameter without a value (flag parameter). The key will be automatically +percent-encoded when building. +-/ +def addQueryFlag (b : Builder) (key : String) : Builder := + { b with query := b.query.push (key, none) } + +/-- +Replaces all query parameters. Keys and values will be automatically percent-encoded when building. +-/ +def setQuery (b : Builder) (query : Array (String × Option String)) : Builder := + { b with query := query } + +/-- +Sets the fragment identifier. The fragment will be automatically percent-encoded when building. +-/ +def setFragment (b : Builder) (fragment : String) : Builder := + { b with fragment := some fragment } + +/-- +Builds a complete URI from the builder state, encoding all components. Defaults to "https" scheme if +none is specified. +-/ +def build (b : Builder) : URI := + let scheme := b.scheme.getD "https" + + let authority := + if b.host.isSome then + some { + userInfo := b.userInfo + host := b.host.getD default + port := b.port + } + else none + + let path : Path := { + segments := b.pathSegments.map EncodedString.encode + absolute := true + } + + let query := + b.query.map fun (k, v) => + (EncodedQueryString.encode k, v.map EncodedQueryString.encode) + + let query := URI.Query.ofList query.toList + + { + scheme := ⟨scheme.toLower, IsLowerCase.isLowerCase_toLower⟩ + authority := authority + path + query := query + fragment := b.fragment + } + +end Builder + +end URI + +namespace URI + +/-- +Returns a new URI with the scheme replaced. +-/ +def withScheme (uri : URI) (scheme : String) : URI := + { uri with scheme := ⟨scheme.toLower, IsLowerCase.isLowerCase_toLower⟩ } + +/-- +Returns a new URI with the authority replaced. +-/ +def withAuthority (uri : URI) (authority : Option URI.Authority) : URI := + { uri with authority } + +/-- +Returns a new URI with the path replaced. +-/ +def withPath (uri : URI) (path : URI.Path) : URI := + { uri with path } + +/-- +Returns a new URI with the query replaced. +-/ +def withQuery (uri : URI) (query : URI.Query) : URI := + { uri with query } + +/-- +Returns a new URI with the fragment replaced. +-/ +def withFragment (uri : URI) (fragment : Option String) : URI := + { uri with fragment } + +/-- +Normalizes a URI according to RFC 3986 Section 6. +-/ +def normalize (uri : URI) : URI := + { uri with + scheme := uri.scheme + authority := uri.authority + path := uri.path.normalize + } + +end URI + +/-- +HTTP request target forms as defined in RFC 7230 Section 5.3. + +Reference: https://www.rfc-editor.org/rfc/rfc7230.html#section-5.3 +-/ +inductive RequestTarget where + /-- + Origin-form request target (most common for HTTP requests). Consists of a path, optional query string, + and optional fragment. + Example: `/path/to/resource?key=value#section` + -/ + | originForm (path : URI.Path) (query : Option URI.Query) (fragment : Option String) + + /-- + Absolute-form request target containing a complete URI. Used when making requests through a proxy. + Example: `http://example.com:8080/path?key=value` + -/ + | absoluteForm (uri : URI) + + /-- + Authority-form request target (used for CONNECT requests). + Example: `example.com:443` + -/ + | authorityForm (authority : URI.Authority) + + /-- + Asterisk-form request target (used with OPTIONS requests). + Example: `*` + -/ + | asteriskForm +deriving Inhabited, Repr + +namespace RequestTarget + +/-- +Extracts the path component from a request target, if available. +Returns an empty relative path for targets without a path. +-/ +def path : RequestTarget → URI.Path + | .originForm p _ _ => p + | .absoluteForm u => u.path + | _ => { segments := #[], absolute := false } + +/-- +Extracts the query component from a request target, if available. +Returns an empty array for targets without a query. +-/ +def query : RequestTarget → URI.Query + | .originForm _ q _ => q.getD URI.Query.empty + | .absoluteForm u => u.query + | _ => URI.Query.empty + +/-- +Extracts the authority component from a request target, if available. +-/ +def authority? : RequestTarget → Option URI.Authority + | .authorityForm a => some a + | .absoluteForm u => u.authority + | _ => none + +/-- +Extracts the fragment component from a request target, if available. +-/ +def fragment? : RequestTarget → Option String + | .originForm _ _ frag => frag + | .absoluteForm u => u.fragment + | _ => none + +/-- +Extracts the full URI if the request target is in absolute form. +-/ +def uri? : RequestTarget → Option URI + | .absoluteForm u => some u + | _ => none + +instance : ToString RequestTarget where + toString + | .originForm path query frag => + let pathStr := toString path + let queryStr := query.map toString |>.getD "" + let frag := frag.map (fun f => "#" ++ toString (URI.EncodedString.encode f)) |>.getD "" + s!"{pathStr}{queryStr}{frag}" + | .absoluteForm uri => toString uri + | .authorityForm auth => toString auth + | .asteriskForm => "*" + +end Std.Http.RequestTarget diff --git a/src/Std/Internal/Http/Data/URI/Encoding.lean b/src/Std/Internal/Http/Data/URI/Encoding.lean new file mode 100644 index 000000000000..4be4660717ea --- /dev/null +++ b/src/Std/Internal/Http/Data/URI/Encoding.lean @@ -0,0 +1,575 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Sofia Rodrigues +-/ +module + +prelude +import Init.Grind +public import Init.Data.String + +public section + +namespace Std.Http.URI + +set_option linter.all true + +/-! +# URI Encoding + +This module provides utilities for percent-encoding URI components according to RFC 3986. It includes +character validation, encoding/decoding functions, and types that maintain encoding invariants through +Lean's dependent type system. +-/ + +/-- +Checks if a byte represents an ASCII character (value < 128). +-/ +def isAscii (c : UInt8) : Bool := + c < 128 + +/-- +Checks if a byte is a hexadecimal digit (0-9, a-f, or A-F). Note: This accepts both lowercase and +uppercase hex digits. +-/ +def isHexDigit (c : UInt8) : Bool := + (c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) || + (c ≥ 'a'.toUInt8 && c ≤ 'f'.toUInt8) || + (c ≥ 'A'.toUInt8 && c ≤ 'F'.toUInt8) + +/-- +Checks if a byte is an alphanumeric digit (0-9, a-z, or A-Z). +-/ +def isAlphaNum (c : UInt8) : Bool := + (c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) || + (c ≥ 'a'.toUInt8 && c ≤ 'z'.toUInt8) || + (c ≥ 'A'.toUInt8 && c ≤ 'Z'.toUInt8) + +/-- +Checks if a byte is an unreserved character according to RFC 3986. Unreserved characters are: +alphanumeric, hyphen, period, underscore, and tilde. +-/ +def isUnreserved (c : UInt8) : Bool := + isAlphaNum c || + (c = '-'.toUInt8 || c = '.'.toUInt8 || c = '_'.toUInt8 || c = '~'.toUInt8) + +/-- +Checks if a byte is a valid character in a percent-encoded URI component. Valid characters are +unreserved characters or the percent sign (for escape sequences). +-/ +def isEncodedChar (c : UInt8) : Bool := + isUnreserved c || c = '%'.toUInt8 + +/-- +Checks if a byte is valid in a percent-encoded query string component. Extends `isEncodedChar` to also +allow '+' which represents space in application/x-www-form-urlencoded format. +-/ +def isEncodedQueryChar (c : UInt8) : Bool := + isEncodedChar c || c = '+'.toUInt8 + +/-- +Checks if all characters in a `ByteArray` are allowed in an encoded URI component. This is a fast check +that only verifies the character set, not full encoding validity. +-/ +@[inline] +abbrev isAllowedEncodedChars (s : ByteArray) : Prop := + s.data.all isEncodedChar + +instance : Decidable (isAllowedEncodedChars s) := + inferInstanceAs (Decidable (s.data.all isEncodedChar = true)) + +/-- +Checks if all characters in a `ByteArray` are allowed in an encoded query parameter. Allows '+' as an +alternative encoding for space (application/x-www-form-urlencoded). +-/ +@[inline] +abbrev isAllowedEncodedQueryChars (s : ByteArray) : Prop := + s.data.all isEncodedQueryChar + +instance : Decidable (isAllowedEncodedQueryChars s) := + inferInstanceAs (Decidable (s.data.all isEncodedQueryChar = true)) + +/-- +Validates that all percent signs in a byte array are followed by exactly two hexadecimal digits. +This ensures proper percent-encoding according to RFC 3986. + +For example: +- `%20` is valid (percent followed by two hex digits) +- `%` is invalid (percent with no following digits) +- `%2` is invalid (percent followed by only one digit) +- `%GG` is invalid (percent followed by non-hex characters) +-/ +def isValidPercentEncoding (ba : ByteArray) : Bool := + let rec loop (i : Nat) : Bool := + if h : i < ba.size then + let c := ba[i]'h + if c = '%'.toUInt8 then + if h₂ : i + 2 < ba.size then + let d1 := ba[i + 1]'(by omega) + let d2 := ba[i + 2]'h₂ + if isHexDigit d1 && isHexDigit d2 then + loop (i + 3) + else false + else false + else loop (i + 1) + else true + termination_by ba.size - i + loop 0 + +/-- +Converts a nibble (4-bit value, 0-15) to its hexadecimal digit representation. Returns '0'-'9' for +values 0-9, and 'A'-'F' for values 10-15. +-/ +def hexDigit (n : UInt8) : UInt8 := + if n < 10 then (n + '0'.toUInt8) + else (n - 10 + 'A'.toUInt8) + +/-- +Converts a hexadecimal digit character to its numeric value (0-15). +Returns `none` if the character is not a valid hex digit. +-/ +def hexDigitToUInt8? (c : UInt8) : Option UInt8 := + if c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8 then + some (c - '0'.toUInt8) + else if c ≥ 'a'.toUInt8 && c ≤ 'f'.toUInt8 then + some (c - 'a'.toUInt8 + 10) + else if c ≥ 'A'.toUInt8 && c ≤ 'F'.toUInt8 then + some (c - 'A'.toUInt8 + 10) + else + none + +theorem isAllowedEncodedChars.push {bs : ByteArray} (h : isAllowedEncodedChars bs) (h₁ : isEncodedChar c) : + isAllowedEncodedChars (bs.push c) := by + simpa [isAllowedEncodedChars, ByteArray.push, Array.all_push, And.intro h h₁] + +theorem isAllowedEncodedQueryChars.push {bs : ByteArray} (h : isAllowedEncodedQueryChars bs) (h₁ : isEncodedQueryChar c) : + isAllowedEncodedQueryChars (bs.push c) := by + simpa [isAllowedEncodedQueryChars, ByteArray.push, Array.all_push, And.intro h h₁] + +theorem isAlphaNum_isAscii {c : UInt8} (h : isAlphaNum c) : isAscii c := by + unfold isAlphaNum isAscii at * + simp at h + rcases h with ⟨h1, h2⟩ + next => simp; exact Nat.lt_of_le_of_lt h2 (by decide) + next h => simp; exact Nat.lt_of_le_of_lt h.2 (by decide) + next h => simp; exact Nat.lt_of_le_of_lt h.2 (by decide) + +theorem isEncodedChar_isAscii (c : UInt8) (h : isEncodedChar c) : isAscii c := by + unfold isEncodedChar isUnreserved at * + cases h' : isAlphaNum c + · simp [h'] at *; rcases h with ⟨h, h⟩ | h | h | h <;> (subst_vars; decide) + · simp [h'] at h; exact (isAlphaNum_isAscii h') + +theorem isEncodedQueryChar_isAscii (c : UInt8) (h : isEncodedQueryChar c) : isAscii c := by + unfold isEncodedQueryChar isAscii at * + simp at h + rcases h + next h => exact isEncodedChar_isAscii c h + next h => subst_vars; decide + +theorem hexDigit_isHexDigit (h₀ : x < 16) : isHexDigit (hexDigit x) := by + unfold hexDigit isHexDigit + have h₁ : x.toNat < 16 := h₀ + split <;> simp [Char.toUInt8] + + next p => + have h₂ : x.toNat < 10 := p + have h₂ : 48 ≤ x.toNat + 48 := by omega + have h₃ : x.toNat + 48 ≤ 57 := by omega + have h₄ : x.toNat + 48 < 256 := by omega + + refine Or.inl (Or.inl ⟨?_, ?_⟩) + · exact (UInt8.ofNat_le_iff_le (by decide) h₄ |>.mpr h₂) + · exact (UInt8.ofNat_le_iff_le h₄ (by decide) |>.mpr h₃) + + next p => + have h₂ : ¬(x.toNat < 10) := p + have h₃ : 65 ≤ x.toNat - 10 + 65 := by omega + have h₅ : x.toNat - 10 + 65 ≤ 70 := by omega + have h₄ : x.toNat - 10 + 65 < 256 := by omega + + refine Or.inr ⟨?_, ?_⟩ + · simpa [UInt8.ofNat_sub (by omega : 10 ≤ x.toNat)] using + UInt8.ofNat_le_iff_le (by decide : 65 < 256) h₄ |>.mpr h₃ + · simpa [UInt8.ofNat_add, UInt8.ofNat_sub (by omega : 10 ≤ x.toNat)] using + UInt8.ofNat_le_iff_le h₄ (by decide : 70 < 256) |>.mpr h₅ + +theorem isHexDigit_isAlphaNum {c : UInt8} (h : isHexDigit c) : isAlphaNum c := by + unfold isHexDigit isAlphaNum at * + simp at h ⊢ + rcases h with ⟨h1, h2⟩ + next => exact Or.inl (Or.inl ⟨h1, h2⟩) + next h => exact Or.inl (Or.inr ⟨h.1, Nat.le_trans h.2 (by decide)⟩) + next h => exact Or.inr ⟨h.1, Nat.le_trans h.2 (by decide)⟩ + +theorem isAlphaNum_isEncodedChar {c : UInt8} (h : isAlphaNum c) : isEncodedChar c := by + unfold isEncodedChar isUnreserved + simp at * + exact Or.inl (Or.inl h) + +theorem isAlphaNum_isEncodedQueryChar {c : UInt8} (h : isAlphaNum c) : isEncodedQueryChar c := by + unfold isEncodedQueryChar isEncodedChar isUnreserved + simp at * + exact Or.inl (Or.inl (Or.inl h)) + +theorem isHexDigit_isEncodedChar {c : UInt8} (h : isHexDigit c) : isEncodedChar c := + isAlphaNum_isEncodedChar (isHexDigit_isAlphaNum h) + +theorem isHexDigit_isEncodedQueryChar {c : UInt8} (h : isHexDigit c) : isEncodedQueryChar c := + isAlphaNum_isEncodedQueryChar (isHexDigit_isAlphaNum h) + +theorem all_of_all_of_imp {b : ByteArray} (h : b.data.all p) (imp : ∀ c, p c → q c) : b.data.all q := by + rw [Array.all_eq] at * + simp at * + intro i x + exact (imp b.data[i]) (h i x) + +theorem autf8EncodeChar_flatMap_ascii {a : List UInt8} + (is_ascii_list : ∀ (x : UInt8), x ∈ a → x < 128) : + List.flatMap (fun a => String.utf8EncodeChar (Char.ofUInt8 a)) a = a := by + have h_encode {i : UInt8} (h : i < 128) : String.utf8EncodeChar (Char.ofUInt8 i) = [i] := by + simp [Char.ofUInt8, String.utf8EncodeChar, show ¬127 < i.toNat from Nat.not_lt_of_le (Nat.le_pred_of_lt h)] + induction a with + | nil => simp + | cons head tail ih => + simp [List.flatMap_cons] + rw [h_encode] + · simp + rw [ih] + intro x hx + exact is_ascii_list x (by simp [hx]) + · exact is_ascii_list head (by simp) + +theorem List.toByteArray_loop_eq (xs : List UInt8) (acc : ByteArray) : + (List.toByteArray.loop xs acc).data = acc.data ++ xs.toArray := by + induction xs generalizing acc with + | nil => simp [List.toByteArray.loop] + | cons x xs ih => simp [List.toByteArray.loop, ih, Array.push] + +theorem ByteArray.toList_toByteArray (ba : ByteArray) : + ba.data.toList.toByteArray = ba := by + cases ba with + | mk data => + simp [List.toByteArray] + apply ByteArray.ext + simp [List.toByteArray_loop_eq, ByteArray.empty] + decide + +theorem ascii_is_valid_utf8 (ba : ByteArray) (s : ba.data.all isAscii) : ByteArray.IsValidUTF8 ba := by + refine ⟨ba.data.toList.map Char.ofUInt8, ?_⟩ + rw [List.utf8Encode] + simp only [List.flatMap_map] + have is_ascii : ∀ (x : UInt8), x ∈ ba.data.toList → x < 128 := by + let is_ascii := Array.all_eq_true_iff_forall_mem.mp s + simp [isAscii] at is_ascii + intro x hx + exact is_ascii x (by simp_all) + rw [autf8EncodeChar_flatMap_ascii is_ascii] + exact ByteArray.toList_toByteArray ba |>.symm + +/-- +A percent-encoded URI component with a compile-time proof that it contains only valid encoded characters. +This provides type-safe URI encoding without runtime validation. + +The invariant guarantees that the string contains only unreserved characters (alphanumeric, hyphen, period, +underscore, tilde) and percent signs (for escape sequences). +-/ +structure EncodedString where + private mk :: + + /-- + The underlying byte array containing the percent-encoded data. + -/ + toByteArray : ByteArray + + /-- + Proof that all characters in the byte array are valid encoded characters. + -/ + valid : isAllowedEncodedChars toByteArray + +namespace EncodedString + +/-- +Creates an empty encoded string. +-/ +def empty : EncodedString := + ⟨.empty, by native_decide⟩ + +instance : Inhabited EncodedString where + default := EncodedString.empty + +/-- +Appends a single encoded character to an encoded string. +Requires that the character is not '%' to maintain the percent-encoding invariant. +-/ +private def push (s : EncodedString) (c : UInt8) (h : isEncodedChar c) : EncodedString := + ⟨s.toByteArray.push c, isAllowedEncodedChars.push s.valid h⟩ + +/-- +Converts a byte to its percent-encoded hexadecimal representation (%XX). For example, a space +character (0x20) becomes "%20". +-/ +private def byteToHex (b : UInt8) (s : EncodedString) : EncodedString := + let ba := s.toByteArray.push '%'.toUInt8 + |>.push (hexDigit (b >>> 4)) + |>.push (hexDigit (b &&& 0xF)) + let valid := by + have h1 : isEncodedChar '%'.toUInt8 := by decide + have h2 : isEncodedChar (hexDigit (b >>> 4)) := + isHexDigit_isEncodedChar (hexDigit_isHexDigit (BitVec.toNat_ushiftRight_lt b.toBitVec 4 (by decide))) + have h3 : isEncodedChar (hexDigit (b &&& 0xF)) := + isHexDigit_isEncodedChar (hexDigit_isHexDigit (@UInt8.and_lt_add_one b 0xF (by decide))) + exact isAllowedEncodedChars.push (isAllowedEncodedChars.push (isAllowedEncodedChars.push s.valid h1) h2) h3 + ⟨ba, valid⟩ + +/-- +Encodes a raw string into an `EncodedString` with automatic proof construction. Unreserved characters +(alphanumeric, hyphen, period, underscore, tilde) are kept as-is, while all other characters are percent-encoded. +-/ +def encode (s : String) : EncodedString := + s.toUTF8.foldl (init := EncodedString.empty) fun acc c => + if h : isUnreserved c then + acc.push c (by simp [isEncodedChar]; exact Or.inl h) + else + byteToHex c acc + +/-- +Attempts to create an `EncodedString` from a `ByteArray`. Returns `some` if the byte array contains only +valid encoded characters and all percent signs are followed by exactly two hex digits, `none` otherwise. +-/ +def ofByteArray? (ba : ByteArray) : Option EncodedString := + if h : isAllowedEncodedChars ba then + if isValidPercentEncoding ba then some ⟨ba, h⟩ else none + else none + +/-- +Creates an `EncodedString` from a `ByteArray`, panicking if the byte array is invalid. +-/ +def ofByteArray! (ba : ByteArray) : EncodedString := + match ofByteArray? ba with + | some es => es + | none => panic! "invalid encoded string" + +/-- +Creates an `EncodedString` from a `String` by checking if it's already a valid percent-encoded string. +Returns `some` if valid, `none` otherwise. +-/ +def ofString? (s : String) : Option EncodedString := + ofByteArray? s.toUTF8 + +/-- +Creates an `EncodedString` from a `String`, panicking if the string is not a valid percent-encoded string. +-/ +def ofString! (s : String) : EncodedString := + ofByteArray! s.toUTF8 + +/-- +Creates an `EncodedString` from a `ByteArray` with compile-time proofs. +Use this when you have proofs that the byte array is valid. +-/ +def new (ba : ByteArray) (valid : isAllowedEncodedChars ba) (_validEncoding : isValidPercentEncoding ba) : EncodedString := + ⟨ba, valid⟩ + +instance : ToString EncodedString where + toString es := ⟨es.toByteArray, ascii_is_valid_utf8 es.toByteArray (all_of_all_of_imp es.valid isEncodedChar_isAscii)⟩ + +/-- +Decodes an `EncodedString` back to a regular `String`. Converts percent-encoded sequences (e.g., "%20") +back to their original characters. Returns `none` if the decoded bytes are not valid UTF-8. +-/ +def decode (es : EncodedString) : Option String := Id.run do + let mut decoded : ByteArray := ByteArray.empty + let rawBytes := es.toByteArray + let len := rawBytes.size + let mut i := 0 + let percent := '%'.toNat.toUInt8 + while h : i < len do + let c := rawBytes[i] + (decoded, i) := if h₁ : c == percent ∧ i + 1 < len then + let h1 := rawBytes[i + 1] + if let some hd1 := hexDigitToUInt8? h1 then + if h₂ : i + 2 < len then + let h2 := rawBytes[i + 2] + if let some hd2 := hexDigitToUInt8? h2 then + (decoded.push (hd1 * 16 + hd2), i + 3) + else + (((decoded.push c).push h1).push h2, i + 3) + else + ((decoded.push c).push h1, i + 2) + else + ((decoded.push c).push h1, i + 2) + else + (decoded.push c, i + 1) + return String.fromUTF8? decoded + +instance : Repr EncodedString where + reprPrec es := reprPrec (toString es) + +instance : BEq EncodedString where + beq x y := x.toByteArray = y.toByteArray + +instance : Hashable EncodedString where + hash x := Hashable.hash x.toByteArray + +end EncodedString + +/-- +A percent-encoded query string component with a compile-time proof that it contains only valid encoded +query characters. Extends `EncodedString` to support the '+' character for spaces, following the +application/x-www-form-urlencoded format. + +This type is specifically designed for encoding query parameters where spaces can be represented as '+' +instead of "%20". +-/ +structure EncodedQueryString where + private mk :: + + /-- + The underlying byte array containing the percent-encoded query data. + -/ + toByteArray : ByteArray + + /-- + Proof that all characters in the byte array are valid encoded query characters. + -/ + valid : isAllowedEncodedQueryChars toByteArray + +namespace EncodedQueryString + +/-- +Creates an empty encoded query string. +-/ +def empty : EncodedQueryString := + ⟨.empty, by native_decide⟩ + +instance : Inhabited EncodedQueryString where + default := EncodedQueryString.empty + +/-- +Appends a single encoded query character to an encoded query string. +-/ +private def push (s : EncodedQueryString) (c : UInt8) (h : isEncodedQueryChar c) : EncodedQueryString := + ⟨s.toByteArray.push c, isAllowedEncodedQueryChars.push s.valid h⟩ + +/-- +Attempts to create an `EncodedQueryString` from a `ByteArray`. Returns `some` if the byte array contains +only valid encoded query characters and all percent signs are followed by exactly two hex digits, `none` otherwise. +-/ +def ofByteArray? (ba : ByteArray) : Option EncodedQueryString := + if h : isAllowedEncodedQueryChars ba then + if isValidPercentEncoding ba then some ⟨ba, h⟩ else none + else none + +/-- +Creates an `EncodedQueryString` from a `ByteArray`, panicking if the byte array is invalid. +-/ +def ofByteArray! (ba : ByteArray) : EncodedQueryString := + match ofByteArray? ba with + | some es => es + | none => panic! "invalid encoded query string" + +/-- +Creates an `EncodedQueryString` from a `String` by checking if it's already a valid percent-encoded string. +Returns `some` if valid, `none` otherwise. +-/ +def ofString? (s : String) : Option EncodedQueryString := + ofByteArray? s.toUTF8 + +/-- +Creates an `EncodedQueryString` from a `String`, panicking if the string is not a valid percent-encoded string. +-/ +def ofString! (s : String) : EncodedQueryString := + ofByteArray! s.toUTF8 + +/-- +Creates an `EncodedQueryString` from a `ByteArray` with compile-time proofs. +Use this when you have proofs that the byte array is valid. +-/ +def new (ba : ByteArray) (valid : isAllowedEncodedQueryChars ba) (_validEncoding : isValidPercentEncoding ba) : EncodedQueryString := + ⟨ba, valid⟩ + +/-- +Converts a byte to its percent-encoded hexadecimal representation (%XX). For example, a space character +(0x20) becomes "%20". +-/ +private def byteToHex (b : UInt8) (s : EncodedQueryString) : EncodedQueryString := + let ba := s.toByteArray.push '%'.toUInt8 + |>.push (hexDigit (b >>> 4)) + |>.push (hexDigit (b &&& 0xF)) + let valid := by + have h1 : isEncodedQueryChar '%'.toUInt8 := by decide + have h2 : isEncodedQueryChar (hexDigit (b >>> 4)) := + isHexDigit_isEncodedQueryChar (hexDigit_isHexDigit (BitVec.toNat_ushiftRight_lt b.toBitVec 4 (by decide))) + have h3 : isEncodedQueryChar (hexDigit (b &&& 0xF)) := + isHexDigit_isEncodedQueryChar (hexDigit_isHexDigit (@UInt8.and_lt_add_one b 0xF (by decide))) + exact isAllowedEncodedQueryChars.push (isAllowedEncodedQueryChars.push (isAllowedEncodedQueryChars.push s.valid h1) h2) h3 + ⟨ba, valid⟩ + +/-- +Encodes a raw string into an `EncodedQueryString` with automatic proof construction. Unreserved characters +are kept as-is, spaces are encoded as '+', and all other characters are percent-encoded. +-/ +def encode (s : String) : EncodedQueryString := + s.toUTF8.foldl (init := EncodedQueryString.empty) fun acc c => + if h : isUnreserved c then + acc.push c (by simp [isEncodedQueryChar, isEncodedChar]; exact Or.inl (Or.inl h)) + else if _ : c = ' '.toUInt8 then + acc.push '+'.toUInt8 (by simp [isEncodedQueryChar]) + else + byteToHex c acc + +instance : ToString EncodedQueryString where + toString es := ⟨es.toByteArray, ascii_is_valid_utf8 es.toByteArray (all_of_all_of_imp es.valid isEncodedQueryChar_isAscii)⟩ + +/-- +Decodes an `EncodedQueryString` back to a regular `String`. Converts percent-encoded sequences and '+' +signs back to their original characters. Returns `none` if the decoded bytes are not valid UTF-8. + +This is almost the same code from `System.Uri.UriEscape.decodeUri`, but with `Option` instead. +-/ +def decode (es : EncodedQueryString) : Option String := Id.run do + let mut decoded : ByteArray := ByteArray.empty + let rawBytes := es.toByteArray + let len := rawBytes.size + let mut i := 0 + let percent := '%'.toNat.toUInt8 + let plus := '+'.toNat.toUInt8 + while h : i < len do + let c := rawBytes[i] + (decoded, i) := if c == plus then + (decoded.push ' '.toNat.toUInt8, i + 1) + else if h₁ : c == percent ∧ i + 1 < len then + let h1 := rawBytes[i + 1] + if let some hd1 := hexDigitToUInt8? h1 then + if h₂ : i + 2 < len then + let h2 := rawBytes[i + 2] + if let some hd2 := hexDigitToUInt8? h2 then + (decoded.push (hd1 * 16 + hd2), i + 3) + else + (((decoded.push c).push h1).push h2, i + 3) + else + ((decoded.push c).push h1, i + 2) + else + ((decoded.push c).push h1, i + 2) + else + (decoded.push c, i + 1) + return String.fromUTF8? decoded + +end EncodedQueryString + +instance : Repr EncodedQueryString where + reprPrec es := reprPrec (toString es) + +instance : BEq EncodedQueryString where + beq x y := x.toByteArray = y.toByteArray + +instance : Hashable EncodedQueryString where + hash x := Hashable.hash x.toByteArray + +instance : Hashable (Option EncodedQueryString) where + hash + | some x => Hashable.hash ((ByteArray.mk #[1] ++ x.toByteArray)) + | none => Hashable.hash (ByteArray.mk #[0]) + +end Std.Http.URI diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean new file mode 100644 index 000000000000..16c0084d84df --- /dev/null +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -0,0 +1,399 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Sofia Rodrigues +-/ +module + +prelude +public import Init.Data.String +public import Std.Internal.Parsec +public import Std.Internal.Parsec.ByteArray +public import Std.Internal.Http.Data.URI.Basic + +public section + +/-! +# URI Parser + +This module provides parsers for URIs and request targets according to RFC 3986. +It handles parsing of schemes, authorities, paths, queries, and fragments. +-/ + +namespace Std.Http.URI.Parser + +set_option linter.all true + +open Std Internal Parsec ByteArray + +@[inline] +private def isDigit (c : UInt8) : Bool := + c >= '0'.toUInt8 ∧ c <= '9'.toUInt8 + +@[inline] +private def isHexDigit (c : UInt8) : Bool := + isDigit c ∨ (c >= 'A'.toUInt8 ∧ c <= 'F'.toUInt8) ∨ (c >= 'a'.toUInt8 ∧ c <= 'f'.toUInt8) + +@[inline] +private def isAlpha (c : UInt8) : Bool := + (c >= 'A'.toUInt8 ∧ c <= 'Z'.toUInt8) ∨ (c >= 'a'.toUInt8 ∧ c <= 'z'.toUInt8) + +@[inline] +private def isAlphaNum (c : UInt8) : Bool := + isAlpha c ∨ isDigit c + +@[inline] +private def isUnreserved (c : UInt8) : Bool := + isAlphaNum c ∨ c = '-'.toUInt8 ∨ c = '.'.toUInt8 ∨ c = '_'.toUInt8 ∨ c = '~'.toUInt8 + +@[inline] +private def isSubDelims (c : UInt8) : Bool := + c = '!'.toUInt8 ∨ c = '$'.toUInt8 ∨ c = '&'.toUInt8 ∨ c = '\''.toUInt8 ∨ + c = '('.toUInt8 ∨ c = ')'.toUInt8 ∨ c = '*'.toUInt8 ∨ c = '+'.toUInt8 ∨ + c = ','.toUInt8 ∨ c = ';'.toUInt8 ∨ c = '='.toUInt8 + +@[inline] +private def isGenDelims (c : UInt8) : Bool := + c = ':'.toUInt8 ∨ c = '/'.toUInt8 ∨ c = '?'.toUInt8 ∨ c = '#'.toUInt8 ∨ + c = '['.toUInt8 ∨ c = ']'.toUInt8 ∨ c = '@'.toUInt8 + +@[inline] +private def isReserved (c : UInt8) : Bool := + isGenDelims c ∨ isSubDelims c + +@[inline] +private def isPChar (c : UInt8) : Bool := + isUnreserved c ∨ isSubDelims c ∨ c = ':'.toUInt8 ∨ c = '@'.toUInt8 ∨ c = '%'.toUInt8 + +@[inline] +private def isRegNameChar (c : UInt8) : Bool := + isUnreserved c ∨ isSubDelims c ∨ c = '%'.toUInt8 + +@[inline] +private def isSchemeChar (c : UInt8) : Bool := + isAlphaNum c ∨ c = '+'.toUInt8 ∨ c = '-'.toUInt8 ∨ c = '.'.toUInt8 + +@[inline] +private def isQueryChar (c : UInt8) : Bool := + isPChar c ∨ c = '/'.toUInt8 ∨ c = '?'.toUInt8 + +@[inline] +private def isFragmentChar (c : UInt8) : Bool := + isPChar c ∨ c = '/'.toUInt8 ∨ c = '?'.toUInt8 + +@[inline] +private def isUserInfoChar (c : UInt8) : Bool := + isUnreserved c ∨ isSubDelims c ∨ c = '%'.toUInt8 ∨ c = ':'.toUInt8 + +@[inline] +private def tryOpt (p : Parser α) : Parser (Option α) := + optional (attempt p) + +@[inline] +private def ofExcept (p : Except String α) : Parser α := + match p with + | .ok res => pure res + | .error err => fail err + +@[inline] +private def peekIs (p : UInt8 → Bool) : Parser Bool := do + return (← peekWhen? p).isSome + +private def hexToByte (digit : UInt8) : UInt8 := + if digit <= '9'.toUInt8 then digit - '0'.toUInt8 + else if digit <= 'F'.toUInt8 then digit - 'A'.toUInt8 + 10 + else digit - 'a'.toUInt8 + 10 + +private def parsePctEncoded : Parser UInt8 := do + skipByte '%'.toUInt8 + let hi ← hexToByte <$> satisfy isHexDigit + let lo ← hexToByte <$> satisfy isHexDigit + return (hi <<< 4) ||| lo + +-- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +private def parseScheme : Parser URI.Scheme := do + let schemeBytes ← takeWhileUpTo1 isSchemeChar 63 + return ⟨String.fromUTF8! schemeBytes.toByteArray |>.toLower, .isLowerCase_toLower⟩ + +-- port = *DIGIT +private def parsePortNumber : Parser UInt16 := do + let portBytes ← takeWhileUpTo isDigit 5 + if portBytes.size = 0 then fail "empty port number" + let portStr := String.fromUTF8! portBytes.toByteArray + + let some portNum := String.toNat? portStr + | fail s!"invalid port number:{portStr}" + + if portNum > 65535 then + fail s!"port number too large: {portNum}" + + return portNum.toUInt16 + +-- userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +private def parseUserInfo : Parser URI.UserInfo := do + let userBytesName ← takeWhileUpTo (fun x => x ≠ ':'.toUInt8 ∧ isUserInfoChar x) 1024 + + let some userName := URI.EncodedString.ofByteArray? userBytesName.toByteArray + | fail "invalid percent encoding in user info" + + let userPass ← if ← peekIs (· == ':'.toUInt8) then + skip + + let userBytesPass ← takeWhileUpTo isUserInfoChar 1024 + + let some userStrPass := URI.EncodedString.ofByteArray? userBytesPass.toByteArray >>= URI.EncodedString.decode + | fail "invalid percent encoding in user info" + + pure <| some userStrPass + else + pure none + + let some userName := userName.decode + | fail "invalid username" + + return ⟨userName, userPass⟩ + +-- IP-literal = "[" ( IPv6address / IPvFuture ) "]" +private def parseIPv6 : Parser Net.IPv6Addr := do + skipByte '['.toUInt8 + + let result ← takeWhileUpTo1 (fun x => x = ':'.toUInt8 ∨ isHexDigit x) 256 + + skipByte ']'.toUInt8 + + let ipv6Str := String.fromUTF8! result.toByteArray + let some ipv6Addr := Std.Net.IPv6Addr.ofString ipv6Str + | fail s!"invalid IPv6 address: {ipv6Str}" + + return ipv6Addr + +-- IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +private def parseIPv4 : Parser Net.IPv4Addr := do + let result ← takeWhileUpTo1 (fun x => x = '.'.toUInt8 ∨ isDigit x) 256 + + let ipv4Str := String.fromUTF8! result.toByteArray + let some ipv4Str := Std.Net.IPv4Addr.ofString ipv4Str + | fail s!"invalid IPv4 address: {ipv4Str}" + + return ipv4Str + +-- host = IP-literal / IPv4address / reg-name +private def parseHost : Parser URI.Host := do + if (← peekWhen? (· == '['.toUInt8)).isSome then + return .ipv6 (← parseIPv6) + else if (← peekWhen? isDigit).isSome then + return .ipv4 (← parseIPv4) + else + let isHostName x := isUnreserved x ∨ x = '%'.toUInt8 ∨ isSubDelims x + + let some str := URI.EncodedString.ofByteArray? (← takeWhileUpTo1 isHostName 1024).toByteArray >>= URI.EncodedString.decode + | fail s!"invalid host" + + return .name ⟨str.toLower, .isLowerCase_toLower⟩ + +-- authority = [ userinfo "@" ] host [ ":" port ] +private def parseAuthority : Parser URI.Authority := do + let userinfo ← tryOpt do + let ui ← parseUserInfo + skipByte '@'.toUInt8 + return ui + + let host ← parseHost + + let port ← optional do + skipByte ':'.toUInt8 + parsePortNumber + + return { userInfo := userinfo, host := host, port := port } + +-- segment = *pchar +private def parseSegment : Parser ByteSlice := do + takeWhileUpTo isPChar 256 + +/- +path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + +path-abempty = *( "/" segment ) +path-absolute = "/" [ segment-nz *( "/" segment ) ] +path-noscheme = segment-nz-nc *( "/" segment ) +path-rootless = segment-nz *( "/" segment ) +path-empty = 0 +-/ + +/-- +Parses an URI with combined parsing and validation. +-/ +def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do + let mut isAbsolute := false + let mut segments : Array _ := #[] + + let isSegmentOrSlash ← peekIs (fun c => isPChar c ∨ c = '/'.toUInt8) + + if ¬allowEmpty ∧ ((← isEof) ∨ ¬isSegmentOrSlash) then + fail "need a path" + + -- Check if path is absolute + if ← peekIs (· == '/'.toUInt8) then + isAbsolute := true + skip + if ← peekIs (· == '/'.toUInt8) then + fail "it's a scheme starter" + else if forceAbsolute then + if allowEmpty ∧ ((← isEof) ∨ ¬isSegmentOrSlash) then + return { segments := segments, absolute := isAbsolute } + else + fail "require '/' in path" + else + pure () + + -- Parse segments + while (← peek?).isSome do + let segmentBytes ← parseSegment + + let some segmentStr := URI.EncodedString.ofByteArray? segmentBytes.toByteArray + | fail "invalid percent encoding in path segment" + + segments := segments.push segmentStr + + if (← peek?).any (· == '/'.toUInt8) then + skip + -- If path ends with '/', add empty segment + if (← peek?).isNone then + segments := segments.push (URI.EncodedString.empty) + else + break + + return { segments := segments, absolute := isAbsolute } + +-- query = *( pchar / "/" / "?" ) +private def parseQuery : Parser URI.Query := do + let queryBytes ← takeWhileUpTo isQueryChar 1024 + + let some queryStr := String.fromUTF8? queryBytes.toByteArray + | fail "invalid query string" + + let pairs : Option URI.Query := queryStr.splitOn "&" |>.foldlM (init := URI.Query.empty) fun acc pair => do + match pair.splitOn "=" with + | [key] => + let key ← URI.EncodedQueryString.ofString? key + pure (acc.insertEncoded key none) + | key :: value => + let key ← URI.EncodedQueryString.ofString? key + let value ← URI.EncodedQueryString.ofString? (String.intercalate "=" value) + pure (acc.insertEncoded key (some value)) + | [] => pure acc + + if let some pairs := pairs then + return pairs + else + fail "invalid query string" + +-- fragment = *( pchar / "/" / "?" ) +private def parseFragment : Parser URI.EncodedString := do + let fragmentBytes ← takeWhileUpTo isFragmentChar 1024 + + let some fragmentStr := URI.EncodedString.ofByteArray? fragmentBytes.toByteArray + | fail "invalid percent encoding in fragment" + + return fragmentStr + +private def parseHierPart : Parser (Option URI.Authority × URI.Path) := do + -- Check for "//" authority path-abempty + if (← tryOpt (skipString "//")).isSome then + let authority ← parseAuthority + let path ← parsePath true true -- path-abempty (must start with "/" or be empty) + return (some authority, path) + else + -- path-absolute / path-rootless / path-empty + let path ← parsePath false true + return (none, path) + +/-- +Parses a URI (Uniform Resource Identifier). + +URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty +-/ +public def parseURI : Parser URI := do + let scheme ← parseScheme + skipByte ':'.toUInt8 + + let (authority, path) ← parseHierPart + + let query ← optional (skipByteChar '?' *> parseQuery) + let query := query.getD .empty + + let fragment ← optional do + let some result := (← (skipByteChar '#' *> parseFragment)) |>.decode + | fail "invalid fragment parse encoding" + return result + + return { scheme, authority, path, query, fragment } + +/-- +Parses a request target with combined parsing and validation. +-/ +public def parseRequestTarget : Parser RequestTarget := + asterisk <|> origin <|> authority <|> absolute +where + -- The asterisk form + asterisk : Parser RequestTarget := do + skipByte '*'.toUInt8 + return .asteriskForm + + -- origin-form = absolute-path [ "?" query ] + -- absolute-path = 1*( "/" segment ) + origin : Parser RequestTarget := attempt do + if ← peekIs (· == '/'.toUInt8) then + let path ← parsePath true true + let query ← optional (skipByte '?'.toUInt8 *> parseQuery) + let frag ← optional do + let some result := (← (skipByteChar '#' *> parseFragment)) |>.decode + | fail "invalid fragment parse encoding" + return result + + return .originForm path query frag + else + fail "not origin" + + -- absolute-URI = scheme ":" hier-part [ "?" query ] + absolute : Parser RequestTarget := attempt do + let scheme ← parseScheme + skipByte ':'.toUInt8 + let (authority, path) ← parseHierPart + let query ← optional (skipByteChar '?' *> parseQuery) + let query := query.getD URI.Query.empty + let fragment ← optional do + let some result := (← (skipByteChar '#' *> parseFragment)) |>.decode + | fail "invalid fragment parse encoding" + return result + + return .absoluteForm { path, scheme, authority, query, fragment } + + -- authority-form = host ":" port + authority : Parser RequestTarget := attempt do + let host ← parseHost + skipByteChar ':' + let port ← parsePortNumber + return .authorityForm { host, port := some port } + +/-- +Parses an HTTP `Host` header value. +-/ +public def parseHostHeader : Parser (URI.Host × Option UInt16) := do + let host ← parseHost + + let port ← optional do + skipByte ':'.toUInt8 + parsePortNumber + + if ¬(← isEof) then + fail "invalid host header" + + return (host, port) + +end Std.Http.URI.Parser diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean new file mode 100644 index 000000000000..deaae577e3f2 --- /dev/null +++ b/tests/lean/run/async_http_uri.lean @@ -0,0 +1,825 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Sofia Rodrigues +-/ +import Std.Internal.Http.Data.URI +import Std.Internal.Http.Data.URI.Encoding + +open Std.Http +open Std.Http.URI +open Std.Http.URI.Parser + +/-! +# URI Tests + +Comprehensive tests for URI parsing, encoding, normalization, and manipulation. +This file consolidates tests from multiple URI-related test files. +-/ + +-- ============================================================================ +-- Helper Functions +-- ============================================================================ + +def runParser (parser : Std.Internal.Parsec.ByteArray.Parser α) (s : String) : IO α := + IO.ofExcept ((parser <* Std.Internal.Parsec.eof).run s.toUTF8) + +def parseCheck (s : String) (exact : String := s) : IO Unit := do + let result ← runParser parseRequestTarget s + if toString result = exact then + pure () + else + throw (.userError s!"expect {exact.quote} but got {(toString result).quote}") + +def parseCheckFail (s : String) : IO Unit := do + match (parseRequestTarget <* Std.Internal.Parsec.eof).run s.toUTF8 with + | .ok r => + throw <| .userError + s!"expected parse failure, but succeeded with {(repr r)}" + | .error _ => + pure () + +-- ============================================================================ +-- Percent Encoding Tests (EncodedString) +-- ============================================================================ + +-- Valid percent encoding validation +/-- +info: some "abc" +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "abc".toUTF8)) + +/-- +info: some "%20" +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%20".toUTF8)) + +/-- +info: some "hello%20world" +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "hello%20world".toUTF8)) + +/-- +info: some "%FF" +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%FF".toUTF8)) + +/-- +info: some "%00" +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%00".toUTF8)) + +-- Invalid percent encoding: incomplete +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%".toUTF8)) + +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "hello%".toUTF8)) + +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%2".toUTF8)) + +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%A".toUTF8)) + +-- Invalid percent encoding: non-hex characters +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%GG".toUTF8)) + +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%2G".toUTF8)) + +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr (EncodedString.ofByteArray? "%G2".toUTF8)) + +-- ============================================================================ +-- Percent Encoding Decode Tests +-- ============================================================================ + +/-- +info: some "abc" +-/ +#guard_msgs in +#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "abc".toUTF8)) + +/-- +info: some " " +-/ +#guard_msgs in +#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "%20".toUTF8)) + +/-- +info: some "hello world" +-/ +#guard_msgs in +#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "hello%20world".toUTF8)) + +/-- +info: some " !" +-/ +#guard_msgs in +#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "%20%21".toUTF8)) + +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "%FF".toUTF8)) + +/-- +info: some "\x00" +-/ +#guard_msgs in +#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "%00".toUTF8)) + +-- ============================================================================ +-- Query String Encoding Tests +-- ============================================================================ + +/-- +info: some "hello+world" +-/ +#guard_msgs in +#eval IO.println (repr (EncodedQueryString.ofByteArray? "hello+world".toUTF8)) + +/-- +info: none +-/ +#guard_msgs in +#eval IO.println (repr (EncodedQueryString.ofByteArray? "%".toUTF8)) + +/-- +info: some "hello world" +-/ +#guard_msgs in +#eval IO.println (repr <| EncodedQueryString.decode =<< (EncodedQueryString.ofByteArray? "hello+world".toUTF8)) + +/-- +info: some " " +-/ +#guard_msgs in +#eval IO.println (repr <| EncodedQueryString.decode =<< (EncodedQueryString.ofByteArray? "%20".toUTF8)) + +-- ============================================================================ +-- Request Target Parsing - Basic Tests +-- ============================================================================ + +#eval parseCheck "/path/with/encoded%20space" +#eval parseCheck "/path/with/encoded%20space/" +#eval parseCheck "*" +#eval parseCheck "https://ata/b?ata=be#lol%F0%9F%94%A5" +#eval parseCheck "/api/search?q=hello%20world&category=tech%2Bgames" +#eval parseCheck "/" +#eval parseCheck "/api/v1/users/123/posts/456/comments/789" +#eval parseCheck "/files/../etc/passwd" +#eval parseCheck "example.com:8080" +#eval parseCheck "https://example.com:8080/ata" +#eval parseCheck "192.168.1.1:3000" +#eval parseCheck "[::1]:8080" +#eval parseCheck "http://example.com/path/to/resource?query=value" +#eval parseCheck "https://api.example.com:443/v1/users?limit=10" +#eval parseCheck "http://[2001:db8::1]:8080/path" +#eval parseCheck "https://example.com/page#section1" +#eval parseCheck "https://xn--nxasmq6b.xn--o3cw4h/path" +#eval parseCheck "localhost:65535" +#eval parseCheck "https://user:pass@secure.example.com/private" +#eval parseCheck "/double//slash//path" + +-- Parse failure tests +#eval parseCheckFail "/path with space" +#eval parseCheckFail "/path/%" +#eval parseCheckFail "/path/%2" +#eval parseCheckFail "/path/%ZZ" +#eval parseCheckFail "" +#eval parseCheckFail "[::1" +#eval parseCheckFail "[:::1]:80" +#eval parseCheckFail "#frag" +#eval parseCheckFail "/path/\n" +#eval parseCheckFail "/path/\u0000" + +-- ============================================================================ +-- Request Target Parsing - Detailed Output Tests +-- ============================================================================ + +/-- +info: Std.Http.RequestTarget.originForm { segments := #["path", "with", "encoded%20space"], absolute := true } none none +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/path/with/encoded%20space" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.asteriskForm +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "*" + IO.println (repr result) + +/-- +info: some "lol🔥" +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://ata/b?ata=be#lol%F0%9F%94%A5" + IO.println (repr (result.fragment?)) + +/-- +info: #[("q", some "hello%20world"), ("category", some "tech%2Bgames")] +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/api/search?q=hello%20world&category=tech%2Bgames" + IO.println (repr result.query) + +/-- +info: Std.Http.RequestTarget.originForm { segments := #[], absolute := true } none none +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.authorityForm + { userInfo := none, host := Std.Http.URI.Host.name "example.com", port := some 8080 } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "example.com:8080" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.URI.Host.ipv4 192.168.1.1, port := some 3000 } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "192.168.1.1:3000" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.URI.Host.ipv6 ::1, port := some 8080 } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "[::1]:8080" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.absoluteForm + { scheme := "https", + authority := some { userInfo := none, host := Std.Http.URI.Host.name "example.com", port := some 8080 }, + path := { segments := #["ata"], absolute := true }, + query := #[], + fragment := none } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://example.com:8080/ata" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.absoluteForm + { scheme := "http", + authority := some { userInfo := none, host := Std.Http.URI.Host.ipv6 2001:db8::1, port := some 8080 }, + path := { segments := #["path"], absolute := true }, + query := #[], + fragment := none } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "http://[2001:db8::1]:8080/path" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.absoluteForm + { scheme := "https", + authority := some { userInfo := some { username := "user b", password := some "pass" }, + host := Std.Http.URI.Host.name "secure.example.com", + port := none }, + path := { segments := #["private"], absolute := true }, + query := #[], + fragment := none } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://user%20b:pass@secure.example.com/private" + IO.println (repr result) + +-- ============================================================================ +-- IPv6 Host Tests +-- ============================================================================ + +/-- +info: Std.Http.URI.Host.ipv6 ::1 +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "[::1]:8080" + match result.authority? with + | some auth => IO.println (repr auth.host) + | none => IO.println "no authority" + +/-- +info: Std.Http.URI.Host.ipv6 2001:db8::8a2e:370:7334 +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "http://[2001:db8::8a2e:370:7334]:8080/api" + match result.authority? with + | some auth => IO.println (repr auth.host) + | none => IO.println "no authority" + +/-- +info: Std.Http.URI.Host.ipv6 :: +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "http://[::]/path" + match result.authority? with + | some auth => IO.println (repr auth.host) + | none => IO.println "no authority" + +-- ============================================================================ +-- UserInfo Tests +-- ============================================================================ + +/-- +info: some { username := "user", password := some "pass" } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://user:pass@example.com/private" + match result.authority? with + | some auth => IO.println (repr auth.userInfo) + | none => IO.println "no authority" + +/-- +info: some { username := "user only", password := none } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://user%20only@example.com/path" + match result.authority? with + | some auth => IO.println (repr auth.userInfo) + | none => IO.println "no authority" + +/-- +info: some { username := "", password := some "pass" } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://:pass@example.com/path" + match result.authority? with + | some auth => IO.println (repr auth.userInfo) + | none => IO.println "no authority" + +/-- +info: some { username := "user", password := some "p@ss:w0rd" } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://user:p%40ss%3Aw0rd@example.com/" + match result.authority? with + | some auth => IO.println (repr auth.userInfo) + | none => IO.println "no authority" + +-- ============================================================================ +-- Path.normalize Tests (RFC 3986 Section 5.2.4) +-- ============================================================================ + +/-- +info: /a/b +-/ +#guard_msgs in +#eval IO.println <| toString (URI.parse! "http://example.com/a/./b").path.normalize + +/-- +info: /a +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/b/..").path.normalize + +/-- +info: /a/g +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/b/c/./../../g").path.normalize + +/-- +info: /g +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/../../../g").path.normalize + +/-- +info: /a/c +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/b/../c").path.normalize + +/-- +info: /a/ +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/b/c/../.././").path.normalize + +/-- +info: / +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/b/../../..").path.normalize + +/-- +info: / +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/../../../").path.normalize + +/-- +info: /a/b/c +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/./a/./b/./c/.").path.normalize + +/-- +info: /c +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/../b/../c").path.normalize + +-- ============================================================================ +-- Path.parent Tests +-- ============================================================================ + +/-- +info: /a/b +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/b/c").path.parent + +/-- +info: /a +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/b").path.parent + +/-- +info: / +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a").path.parent + +/-- +info: / +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/").path.parent + +-- ============================================================================ +-- Path.join Tests +-- ============================================================================ + +/-- +info: /a/b/c/d +-/ +#guard_msgs in +#eval do + let p1 := (URI.parse! "http://example.com/a/b").path + let p2 : URI.Path := { segments := #[URI.EncodedString.encode "c", URI.EncodedString.encode "d"], absolute := false } + IO.println (p1.join p2) + +/-- +info: /x/y +-/ +#guard_msgs in +#eval do + let p1 := (URI.parse! "http://example.com/a/b").path + let p2 : URI.Path := { segments := #[URI.EncodedString.encode "x", URI.EncodedString.encode "y"], absolute := true } + IO.println (p1.join p2) + +-- ============================================================================ +-- Path.isEmpty Tests +-- ============================================================================ + +#guard (URI.parse! "http://example.com").path.isEmpty = true +#guard (URI.parse! "http://example.com/").path.absolute = true +#guard (URI.parse! "http://example.com/a").path.isEmpty = false +#guard (URI.parse! "http://example.com/a").path.absolute = true + +-- ============================================================================ +-- URI Modification Helpers +-- ============================================================================ + +#guard ((URI.parse! "http://example.com").withScheme "htTps" |>.scheme) == "https" +#guard ((URI.parse! "http://example.com").withScheme "ftP" |>.scheme) == "ftp" + +/-- +info: http://example.com/#section1 +-/ +#guard_msgs in +#eval IO.println ((URI.parse! "http://example.com/").withFragment (some (toString <| URI.EncodedString.encode "section1"))) + +/-- +info: http://example.com/?key=value +-/ +#guard_msgs in +#eval do + let uri := URI.parse! "http://example.com/" + let query := URI.Query.empty.insert "key" "value" + IO.println (uri.withQuery query) + +/-- +info: http://example.com/new/path +-/ +#guard_msgs in +#eval do + let uri := URI.parse! "http://example.com/old/path" + let newPath : URI.Path := { segments := #[URI.EncodedString.encode "new", URI.EncodedString.encode "path"], absolute := true } + IO.println (uri.withPath newPath) + +-- ============================================================================ +-- URI.normalize Tests (RFC 3986 Section 6) +-- ============================================================================ + +#guard (URI.parse! "HTTP://example.com").normalize.scheme == "http" +#guard (URI.parse! "HtTpS://example.com").normalize.scheme == "https" + +/-- +info: http://example.com/ +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://EXAMPLE.COM/").normalize + +/-- +info: http://example.com/ +-/ +#guard_msgs in +#eval IO.println (URI.parse! "HTTP://Example.COM/").normalize + +/-- +info: http://example.com/a/c +-/ +#guard_msgs in +#eval IO.println (URI.parse! "http://example.com/a/b/../c").normalize + +/-- +info: http://example.com/a/g +-/ +#guard_msgs in +#eval IO.println (URI.parse! "HTTP://EXAMPLE.COM/a/b/c/./../../g").normalize + +/-- +info: https://www.example.com/PATH +-/ +#guard_msgs in +#eval IO.println (URI.parse! "HTTPS://WWW.EXAMPLE.COM/PATH").normalize + +-- ============================================================================ +-- Query Parameter Tests +-- ============================================================================ + +-- Query with duplicate keys +/-- +info: 3 +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/search?tag=a&tag=b&tag=c" + let all := result.query.findAll "tag" + IO.println all.size + +/-- +info: #[some "a", some "b", some "c"] +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/search?tag=a&tag=b&tag=c" + let all := result.query.findAll "tag" + IO.println (repr all) + +/-- +info: some (some "a") +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/search?key=a&key=b&key=c" + IO.println (repr (result.query.find? "key")) + +-- Empty value vs no value +/-- +info: some (some "") +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/api?key=" + IO.println (repr (result.query.find? "key")) + +/-- +info: some none +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/api?key" + IO.println (repr (result.query.find? "key")) + +/-- +info: some (some "value") +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/api?key=value" + IO.println (repr (result.query.find? "key")) + +-- ============================================================================ +-- Query Operations +-- ============================================================================ + +#guard (URI.Query.empty.insert "a" "1" |>.contains "a") = true +#guard (URI.Query.empty.contains "nonexistent") = false + +/-- +info: a=1&b=2 +-/ +#guard_msgs in +#eval do + let query := URI.Query.empty + |>.insert "a" "1" + |>.insert "b" "2" + IO.println query.toRawString + +/-- +info: b=2 +-/ +#guard_msgs in +#eval do + let query := URI.Query.empty + |>.insert "a" "1" + |>.insert "b" "2" + |>.erase "a" + IO.println query.toRawString + +/-- +info: key=new +-/ +#guard_msgs in +#eval do + let query := URI.Query.empty + |>.insert "key" "old" + |>.set "key" "new" + IO.println query.toRawString + +-- ============================================================================ +-- Fragment Tests +-- ============================================================================ + +/-- +info: some "section/subsection" +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://example.com/page#section%2Fsubsection" + IO.println (repr result.fragment?) + +/-- +info: some "heading with spaces" +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "https://example.com/page#heading%20with%20spaces" + IO.println (repr result.fragment?) + +/-- +info: none +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/path" + IO.println (repr result.fragment?) + +/-- +info: some "" +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/path#" + IO.println (repr result.fragment?) + +-- ============================================================================ +-- URI Builder Tests +-- ============================================================================ + +/-- +info: https://example.com/api/users?page=1 +-/ +#guard_msgs in +#eval do + let uri := URI.Builder.empty + |>.setScheme "https" + |>.setHost "example.com" + |>.appendPathSegment "api" + |>.appendPathSegment "users" + |>.addQueryParam "page" "1" + |>.build + IO.println uri + +/-- +info: http://localhost:8080/ +-/ +#guard_msgs in +#eval do + let uri := URI.Builder.empty + |>.setScheme "http" + |>.setHost "localhost" + |>.setPort 8080 + |>.build + IO.println uri + +/-- +info: https://user:pass@secure.example.com/private +-/ +#guard_msgs in +#eval do + let uri := URI.Builder.empty + |>.setScheme "https" + |>.setUserInfo "user" (some "pass") + |>.setHost "secure.example.com" + |>.appendPathSegment "private" + |>.build + IO.println uri + +-- ============================================================================ +-- Encoded Path Segment Tests +-- ============================================================================ + +/-- +info: Std.Http.RequestTarget.originForm { segments := #["path%2Fwith%2Fslashes"], absolute := true } none none +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/path%2Fwith%2Fslashes" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.originForm { segments := #["file%20name.txt"], absolute := true } none none +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/file%20name.txt" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.originForm { segments := #["caf%C3%A9"], absolute := true } none none +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "/caf%C3%A9" + IO.println (repr result) + +-- ============================================================================ +-- Authority Form Tests +-- ============================================================================ + +/-- +info: Std.Http.RequestTarget.authorityForm + { userInfo := none, host := Std.Http.URI.Host.name "proxy.example.com", port := some 3128 } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "proxy.example.com:3128" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.URI.Host.ipv4 127.0.0.1, port := some 8080 } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "127.0.0.1:8080" + IO.println (repr result) From 21286eb16312f3b12d8db706948b05728520f4bc Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 23 Jan 2026 21:11:48 -0300 Subject: [PATCH 02/30] fix: domain name comment --- src/Std/Internal/Http/Data/URI/Basic.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 57a5c87f3d29..86215d776dcf 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -55,7 +55,7 @@ structure UserInfo where deriving Inhabited, Repr /-- -? +A domain name represented as a lowercase-normalized string. -/ abbrev DomainName := { s : String // IsLowerCase s } From bad70e3eab4b716c11ffe51daf2eeb643b7d28d3 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 23 Jan 2026 22:24:29 -0300 Subject: [PATCH 03/30] feat: request type has request target --- src/Std/Internal/Http/Data/Request.lean | 36 ++++++++++++++--------- src/Std/Internal/Http/Data/URI/Basic.lean | 3 ++ tests/lean/run/async_http_encode.lean | 6 ++-- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/Std/Internal/Http/Data/Request.lean b/src/Std/Internal/Http/Data/Request.lean index b43f289ae201..d4db6a112bfd 100644 --- a/src/Std/Internal/Http/Data/Request.lean +++ b/src/Std/Internal/Http/Data/Request.lean @@ -9,6 +9,7 @@ prelude public import Std.Internal.Http.Data.Method public import Std.Internal.Http.Data.Version public import Std.Internal.Http.Data.Headers +public import Std.Internal.Http.Data.URI public section @@ -40,7 +41,7 @@ structure Request.Head where /-- The request target/URI indicating the resource being requested -/ - uri : String := "" + uri : RequestTarget := .asteriskForm /-- Collection of HTTP headers for the request (Content-Type, Authorization, etc.) @@ -88,7 +89,7 @@ instance : Encode .v11 Head where encode buffer req := let buffer := Encode.encode (v := .v11) buffer req.method let buffer := buffer.writeChar ' ' - let buffer := buffer.writeString req.uri + let buffer := Encode.encode (v := .v11) buffer req.uri let buffer := buffer.writeChar ' ' let buffer := Encode.encode (v := .v11) buffer req.version let buffer := buffer.writeString "\r\n" @@ -124,9 +125,16 @@ def version (builder : Builder) (version : Version) : Builder := /-- Sets the request target/URI for the request being built -/ -def uri (builder : Builder) (uri : String) : Builder := +def uri (builder : Builder) (uri : RequestTarget) : Builder := { builder with head := { builder.head with uri := uri } } +/-- +Sets the request target/URI for the request being built +-/ +def uri! (builder : Builder) (uri : String) : Builder := + let uri := RequestTarget.parse! uri + { builder with head := { builder.head with uri } } + /-- Sets the headers for the request being built -/ @@ -166,7 +174,7 @@ end Builder /-- Creates a new HTTP GET Request with the specified URI -/ -def get (uri : String) : Builder := +def get (uri : RequestTarget) : Builder := new |>.method .get |>.uri uri @@ -174,7 +182,7 @@ def get (uri : String) : Builder := /-- Creates a new HTTP POST Request builder with the specified URI. -/ -def post (uri : String) : Builder := +def post (uri : RequestTarget) : Builder := new |>.method .post |>.uri uri @@ -182,7 +190,7 @@ def post (uri : String) : Builder := /-- Creates a new HTTP PUT Request builder with the specified URI. -/ -def put (uri : String) : Builder := +def put (uri : RequestTarget) : Builder := new |>.method .put |>.uri uri @@ -190,7 +198,7 @@ def put (uri : String) : Builder := /-- Creates a new HTTP DELETE Request builder with the specified URI -/ -def delete (uri : String) : Builder := +def delete (uri : RequestTarget) : Builder := new |>.method .delete |>.uri uri @@ -198,7 +206,7 @@ def delete (uri : String) : Builder := /-- Creates a new HTTP PATCH Request builder with the specified URI -/ -def patch (uri : String) : Builder := +def patch (uri : RequestTarget) : Builder := new |>.method .patch |>.uri uri @@ -207,25 +215,25 @@ def patch (uri : String) : Builder := Creates a new HTTP HEAD Request builder with the specified URI. Named `head'` to avoid conflict with the `head` field accessor. -/ -def head' (uri : String) : Builder := +def head' (uri : RequestTarget) : Builder := new |>.method .head |>.uri uri /-- Creates a new HTTP OPTIONS Request builder with the specified URI. -Use `Request.options (String.asteriskForm)` for server-wide OPTIONS. +Use `Request.options (RequestTarget.asteriskForm)` for server-wide OPTIONS. -/ -def options (uri : String) : Builder := +def options (uri : RequestTarget) : Builder := new |>.method .options |>.uri uri /-- Creates a new HTTP CONNECT Request builder with the specified URI. -Typically used with `String.authorityForm` for tunneling. +Typically used with `RequestTarget.authorityForm` for tunneling. -/ -def connect (uri : String) : Builder := +def connect (uri : RequestTarget) : Builder := new |>.method .connect |>.uri uri @@ -233,7 +241,7 @@ def connect (uri : String) : Builder := /-- Creates a new HTTP TRACE Request builder with the specified URI -/ -def trace (uri : String) : Builder := +def trace (uri : RequestTarget) : Builder := new |>.method .trace |>.uri uri diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 86215d776dcf..91a5b70b6599 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -726,4 +726,7 @@ instance : ToString RequestTarget where | .authorityForm auth => toString auth | .asteriskForm => "*" +instance : Encode .v11 RequestTarget where + encode buffer target := buffer.writeString (toString target) + end Std.Http.RequestTarget diff --git a/tests/lean/run/async_http_encode.lean b/tests/lean/run/async_http_encode.lean index 8222e48b7d58..6c9fb87c4431 100644 --- a/tests/lean/run/async_http_encode.lean +++ b/tests/lean/run/async_http_encode.lean @@ -132,13 +132,13 @@ info: "X-Custom-Header: value\x0d\n" info: "GET /path HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr ({ method := .get, version := .v11, uri := "/path" } : Request.Head) +#eval encodeStr ({ method := .get, version := .v11, uri := .parse! "/path" } : Request.Head) /-- info: "POST /submit HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr ({ method := .post, version := .v11, uri := "/submit" } : Request.Head) +#eval encodeStr ({ method := .post, version := .v11, uri := .parse! "/submit" } : Request.Head) /-- info: "PUT /resource HTTP/2.0\x0d\nContent-Type: application/json\x0d\n\x0d\n" @@ -147,7 +147,7 @@ info: "PUT /resource HTTP/2.0\x0d\nContent-Type: application/json\x0d\n\x0d\n" #eval encodeStr ({ method := .put version := .v20 - uri := "/resource" + uri := .parse! "/resource" headers := Headers.empty.insert! "content-type" "application/json" } : Request.Head) From 03843fd3f09226c383e73edcfed8fe2807eef7ab Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Mon, 9 Feb 2026 21:09:38 -0300 Subject: [PATCH 04/30] fix: suggestions --- src/Std/Internal/Http/Data/URI.lean | 2 +- src/Std/Internal/Http/Data/URI/Basic.lean | 61 ++++++++++++++++------ src/Std/Internal/Http/Data/URI/Parser.lean | 25 +++++---- tests/lean/run/async_http_uri.lean | 60 +++++++++++++++------ 4 files changed, 106 insertions(+), 42 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI.lean b/src/Std/Internal/Http/Data/URI.lean index 09a808ea3f2b..939a833107e5 100644 --- a/src/Std/Internal/Http/Data/URI.lean +++ b/src/Std/Internal/Http/Data/URI.lean @@ -48,7 +48,7 @@ Panics if the string is not a valid origin-form request target. @[inline] def originForm! (path : String) : RequestTarget := match parse? path with - | some (.originForm p q f) => .originForm p q f + | some (.originForm p q) => .originForm p q | _ => panic! s!"invalid origin-form request target: {path}" end RequestTarget diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 91a5b70b6599..4a400a55cdf8 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -55,9 +55,24 @@ structure UserInfo where deriving Inhabited, Repr /-- -A domain name represented as a lowercase-normalized string. +Checks if a character is valid for use in a domain name. +Valid characters are ASCII alphanumeric, hyphens, and dots. -/ -abbrev DomainName := { s : String // IsLowerCase s } +def isValidDomainNameChar (c : Char) : Bool := + c.isAlphanum || c == '-' || c == '.' + +/-- +Proposition that asserts all characters in a string are valid domain name characters. +-/ +abbrev IsValidDomainName (s : String) : Prop := + s.toList.all isValidDomainNameChar + +/-- +A domain name represented as a validated, lowercase-normalized string. +Only ASCII alphanumeric characters, hyphens, and dots are allowed. +Internationalized domain names must be converted to punycode before use. +-/ +abbrev DomainName := { s : String // IsLowerCase s ∧ IsValidDomainName s } /-- Host component of a URI, supporting domain names and IP addresses. @@ -129,8 +144,8 @@ instance : ToString Authority where toString auth := let userPart := match auth.userInfo with | none => "" - | some ⟨name, some pass⟩ => s!"{toString name}:{toString pass}@" - | some ⟨name, none⟩ => s!"{toString name}@" + | some ⟨name, some pass⟩ => s!"{toString (EncodedString.encode name)}:{toString (EncodedString.encode pass)}@" + | some ⟨name, none⟩ => s!"{toString (EncodedString.encode name)}@" let hostPart := toString auth.host let portPart := match auth.port with | none => "" @@ -495,11 +510,28 @@ def setUserInfo (b : Builder) (username : String) (password : Option String := n } /-- -Sets the host as a domain name. -The domain name will be automatically percent-encoded. +Sets the host as a domain name, returning `none` if the name contains invalid characters. +The domain name will be automatically lowercased. +Only ASCII alphanumeric characters, hyphens, and dots are allowed. +Internationalized domain names must be converted to punycode before use. +-/ +def setHost? (b : Builder) (name : String) : Option Builder := + let lower := name.toLower + if h : IsValidDomainName lower then + some { b with host := some (Host.name ⟨lower, IsLowerCase.isLowerCase_toLower, h⟩) } + else + none + +/-- +Sets the host as a domain name, panicking if the name contains invalid characters. +The domain name will be automatically lowercased. +Only ASCII alphanumeric characters, hyphens, and dots are allowed. +Internationalized domain names must be converted to punycode before use. -/ -def setHost (b : Builder) (name : String) : Builder := - { b with host := some (Host.name ⟨name.toLower, IsLowerCase.isLowerCase_toLower⟩) } +def setHost! (b : Builder) (name : String) : Builder := + match b.setHost? name with + | some b => b + | none => panic! s!"invalid domain name: {name.quote}" /-- Sets the host as an IPv4 address. @@ -651,7 +683,7 @@ inductive RequestTarget where and optional fragment. Example: `/path/to/resource?key=value#section` -/ - | originForm (path : URI.Path) (query : Option URI.Query) (fragment : Option String) + | originForm (path : URI.Path) (query : Option URI.Query) /-- Absolute-form request target containing a complete URI. Used when making requests through a proxy. @@ -679,7 +711,7 @@ Extracts the path component from a request target, if available. Returns an empty relative path for targets without a path. -/ def path : RequestTarget → URI.Path - | .originForm p _ _ => p + | .originForm p _ => p | .absoluteForm u => u.path | _ => { segments := #[], absolute := false } @@ -688,7 +720,7 @@ Extracts the query component from a request target, if available. Returns an empty array for targets without a query. -/ def query : RequestTarget → URI.Query - | .originForm _ q _ => q.getD URI.Query.empty + | .originForm _ q => q.getD URI.Query.empty | .absoluteForm u => u.query | _ => URI.Query.empty @@ -704,7 +736,7 @@ def authority? : RequestTarget → Option URI.Authority Extracts the fragment component from a request target, if available. -/ def fragment? : RequestTarget → Option String - | .originForm _ _ frag => frag + | .originForm _ _ => none | .absoluteForm u => u.fragment | _ => none @@ -717,11 +749,10 @@ def uri? : RequestTarget → Option URI instance : ToString RequestTarget where toString - | .originForm path query frag => + | .originForm path query => let pathStr := toString path let queryStr := query.map toString |>.getD "" - let frag := frag.map (fun f => "#" ++ toString (URI.EncodedString.encode f)) |>.getD "" - s!"{pathStr}{queryStr}{frag}" + s!"{pathStr}{queryStr}" | .absoluteForm uri => toString uri | .authorityForm auth => toString auth | .asteriskForm => "*" diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 16c0084d84df..68a92b3df539 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -178,18 +178,27 @@ private def parseIPv4 : Parser Net.IPv4Addr := do return ipv4Str -- host = IP-literal / IPv4address / reg-name +-- Note: RFC 1123 allows domain labels to start with digits, so we must try IPv4 +-- first and fall back to reg-name parsing if it fails. private def parseHost : Parser URI.Host := do if (← peekWhen? (· == '['.toUInt8)).isSome then return .ipv6 (← parseIPv6) - else if (← peekWhen? isDigit).isSome then - return .ipv4 (← parseIPv4) else - let isHostName x := isUnreserved x ∨ x = '%'.toUInt8 ∨ isSubDelims x + if (← peekWhen? isDigit).isSome then + if let some ipv4 ← tryOpt parseIPv4 then + return .ipv4 ipv4 - let some str := URI.EncodedString.ofByteArray? (← takeWhileUpTo1 isHostName 1024).toByteArray >>= URI.EncodedString.decode + -- It needs to be a legal DNS label, so it differs from reg-name. + let isHostName x := isAlphaNum x ∨ x = '-'.toUInt8 ∨ x = '.'.toUInt8 + + let some str := String.fromUTF8? (← takeWhileUpTo1 isHostName 1024).toByteArray | fail s!"invalid host" - return .name ⟨str.toLower, .isLowerCase_toLower⟩ + let lower := str.toLower + if h : URI.IsValidDomainName lower then + return .name ⟨lower, .isLowerCase_toLower, h⟩ + else + fail s!"invalid domain name: {str}" -- authority = [ userinfo "@" ] host [ ":" port ] private def parseAuthority : Parser URI.Authority := do @@ -351,12 +360,8 @@ where if ← peekIs (· == '/'.toUInt8) then let path ← parsePath true true let query ← optional (skipByte '?'.toUInt8 *> parseQuery) - let frag ← optional do - let some result := (← (skipByteChar '#' *> parseFragment)) |>.decode - | fail "invalid fragment parse encoding" - return result - return .originForm path query frag + return .originForm path query else fail "not origin" diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index deaae577e3f2..2f7cb6944f24 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -210,6 +210,7 @@ info: some " " #eval parseCheck "localhost:65535" #eval parseCheck "https://user:pass@secure.example.com/private" #eval parseCheck "/double//slash//path" +#eval parseCheck "http://user%40example:pass%3Aword@host.com" -- Parse failure tests #eval parseCheckFail "/path with space" @@ -228,7 +229,7 @@ info: some " " -- ============================================================================ /-- -info: Std.Http.RequestTarget.originForm { segments := #["path", "with", "encoded%20space"], absolute := true } none none +info: Std.Http.RequestTarget.originForm { segments := #["path", "with", "encoded%20space"], absolute := true } none -/ #guard_msgs in #eval show IO _ from do @@ -260,7 +261,7 @@ info: #[("q", some "hello%20world"), ("category", some "tech%2Bgames")] IO.println (repr result.query) /-- -info: Std.Http.RequestTarget.originForm { segments := #[], absolute := true } none none +info: Std.Http.RequestTarget.originForm { segments := #[], absolute := true } none -/ #guard_msgs in #eval show IO _ from do @@ -724,14 +725,6 @@ info: none let result ← runParser parseRequestTarget "/path" IO.println (repr result.fragment?) -/-- -info: some "" --/ -#guard_msgs in -#eval show IO _ from do - let result ← runParser parseRequestTarget "/path#" - IO.println (repr result.fragment?) - -- ============================================================================ -- URI Builder Tests -- ============================================================================ @@ -743,7 +736,7 @@ info: https://example.com/api/users?page=1 #eval do let uri := URI.Builder.empty |>.setScheme "https" - |>.setHost "example.com" + |>.setHost! "example.com" |>.appendPathSegment "api" |>.appendPathSegment "users" |>.addQueryParam "page" "1" @@ -757,7 +750,7 @@ info: http://localhost:8080/ #eval do let uri := URI.Builder.empty |>.setScheme "http" - |>.setHost "localhost" + |>.setHost! "localhost" |>.setPort 8080 |>.build IO.println uri @@ -770,7 +763,7 @@ info: https://user:pass@secure.example.com/private let uri := URI.Builder.empty |>.setScheme "https" |>.setUserInfo "user" (some "pass") - |>.setHost "secure.example.com" + |>.setHost! "secure.example.com" |>.appendPathSegment "private" |>.build IO.println uri @@ -780,7 +773,7 @@ info: https://user:pass@secure.example.com/private -- ============================================================================ /-- -info: Std.Http.RequestTarget.originForm { segments := #["path%2Fwith%2Fslashes"], absolute := true } none none +info: Std.Http.RequestTarget.originForm { segments := #["path%2Fwith%2Fslashes"], absolute := true } none -/ #guard_msgs in #eval show IO _ from do @@ -788,7 +781,7 @@ info: Std.Http.RequestTarget.originForm { segments := #["path%2Fwith%2Fslashes"] IO.println (repr result) /-- -info: Std.Http.RequestTarget.originForm { segments := #["file%20name.txt"], absolute := true } none none +info: Std.Http.RequestTarget.originForm { segments := #["file%20name.txt"], absolute := true } none -/ #guard_msgs in #eval show IO _ from do @@ -796,7 +789,7 @@ info: Std.Http.RequestTarget.originForm { segments := #["file%20name.txt"], abso IO.println (repr result) /-- -info: Std.Http.RequestTarget.originForm { segments := #["caf%C3%A9"], absolute := true } none none +info: Std.Http.RequestTarget.originForm { segments := #["caf%C3%A9"], absolute := true } none -/ #guard_msgs in #eval show IO _ from do @@ -823,3 +816,38 @@ info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http. #eval show IO _ from do let result ← runParser parseRequestTarget "127.0.0.1:8080" IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.authorityForm + { userInfo := none, host := Std.Http.URI.Host.name "1example.com", port := some 8080 } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "1example.com:8080" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.absoluteForm + { scheme := "http", + authority := some { userInfo := none, host := Std.Http.URI.Host.name "1example.com", port := none }, + path := { segments := #["path"], absolute := true }, + query := #[], + fragment := none } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "http://1example.com/path" + IO.println (repr result) + +/-- +info: Std.Http.RequestTarget.absoluteForm + { scheme := "http", + authority := some { userInfo := none, host := Std.Http.URI.Host.name "123abc.example.com", port := none }, + path := { segments := #["page"], absolute := true }, + query := #[], + fragment := none } +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "http://123abc.example.com/page" + IO.println (repr result) From 2c23680163ffb77766572fd13b49700e15665e37 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Tue, 10 Feb 2026 17:23:14 -0300 Subject: [PATCH 05/30] fix: imports --- src/Std/Internal/Http/Data/URI/Basic.lean | 1 + src/Std/Internal/Http/Data/URI/Encoding.lean | 5 +++++ src/Std/Internal/Http/Data/URI/Parser.lean | 1 + 3 files changed, 7 insertions(+) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 4a400a55cdf8..4d91e1a411b3 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -6,6 +6,7 @@ Authors: Sofia Rodrigues module prelude +import Init.Data.ToString public import Std.Net public import Std.Internal.Http.Data.URI.Encoding public import Std.Internal.Http.Internal diff --git a/src/Std/Internal/Http/Data/URI/Encoding.lean b/src/Std/Internal/Http/Data/URI/Encoding.lean index 4be4660717ea..5e70a73abf37 100644 --- a/src/Std/Internal/Http/Data/URI/Encoding.lean +++ b/src/Std/Internal/Http/Data/URI/Encoding.lean @@ -7,6 +7,11 @@ module prelude import Init.Grind +import Init.While +import Init.Data.SInt.Lemmas +import Init.Data.UInt.Lemmas +import Init.Data.UInt.Bitwise +import Init.Data.Array.Lemmas public import Init.Data.String public section diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 68a92b3df539..d0753b3a13b3 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -6,6 +6,7 @@ Authors: Sofia Rodrigues module prelude +import Init.While public import Init.Data.String public import Std.Internal.Parsec public import Std.Internal.Parsec.ByteArray From 2cc32928a48fe8919efa73e41a8caedb8bc697cc Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 01:39:12 -0300 Subject: [PATCH 06/30] feat: add parser features to path --- src/Std/Internal/Http/Data/URI/Parser.lean | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index d0753b3a13b3..afc5543d9a3f 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -264,7 +264,15 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do while (← peek?).isSome do let segmentBytes ← parseSegment - let some segmentStr := URI.EncodedString.ofByteArray? segmentBytes.toByteArray + let segmentBytes := segmentBytes.toByteArray.foldl (init := ByteArray.empty) fun acc val => + if isSubDelims val ∨ val = ':'.toUInt8 ∨ val = '@'.toUInt8 then + acc.push '%'.toUInt8 + |>.push (hexDigit (val >>> 4)) + |>.push (hexDigit (val &&& 0xF)) + else + acc.push val + + let some segmentStr := URI.EncodedString.ofByteArray? segmentBytes | fail "invalid percent encoding in path segment" segments := segments.push segmentStr From 451c11d5a113aece8e75076af8581fbc11dbaa12 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 01:52:04 -0300 Subject: [PATCH 07/30] fix: make strict --- src/Std/Internal/Http/Data/URI/Basic.lean | 14 +++---- src/Std/Internal/Http/Data/URI/Parser.lean | 6 +-- tests/lean/run/async_http_encode.lean | 45 ++++++---------------- 3 files changed, 20 insertions(+), 45 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 4d91e1a411b3..f72c0faa993a 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -690,7 +690,7 @@ inductive RequestTarget where Absolute-form request target containing a complete URI. Used when making requests through a proxy. Example: `http://example.com:8080/path?key=value` -/ - | absoluteForm (uri : URI) + | absoluteForm (uri : URI) (noFrag : uri.fragment.isNone) /-- Authority-form request target (used for CONNECT requests). @@ -713,7 +713,7 @@ Returns an empty relative path for targets without a path. -/ def path : RequestTarget → URI.Path | .originForm p _ => p - | .absoluteForm u => u.path + | .absoluteForm u _ => u.path | _ => { segments := #[], absolute := false } /-- @@ -722,7 +722,7 @@ Returns an empty array for targets without a query. -/ def query : RequestTarget → URI.Query | .originForm _ q => q.getD URI.Query.empty - | .absoluteForm u => u.query + | .absoluteForm u _ => u.query | _ => URI.Query.empty /-- @@ -730,7 +730,7 @@ Extracts the authority component from a request target, if available. -/ def authority? : RequestTarget → Option URI.Authority | .authorityForm a => some a - | .absoluteForm u => u.authority + | .absoluteForm u _ => u.authority | _ => none /-- @@ -738,14 +738,14 @@ Extracts the fragment component from a request target, if available. -/ def fragment? : RequestTarget → Option String | .originForm _ _ => none - | .absoluteForm u => u.fragment + | .absoluteForm u _ => u.fragment | _ => none /-- Extracts the full URI if the request target is in absolute form. -/ def uri? : RequestTarget → Option URI - | .absoluteForm u => some u + | .absoluteForm u _ => some u | _ => none instance : ToString RequestTarget where @@ -754,7 +754,7 @@ instance : ToString RequestTarget where let pathStr := toString path let queryStr := query.map toString |>.getD "" s!"{pathStr}{queryStr}" - | .absoluteForm uri => toString uri + | .absoluteForm uri _ => toString uri | .authorityForm auth => toString auth | .asteriskForm => "*" diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index afc5543d9a3f..f28bb76c2de5 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -381,12 +381,8 @@ where let (authority, path) ← parseHierPart let query ← optional (skipByteChar '?' *> parseQuery) let query := query.getD URI.Query.empty - let fragment ← optional do - let some result := (← (skipByteChar '#' *> parseFragment)) |>.decode - | fail "invalid fragment parse encoding" - return result - return .absoluteForm { path, scheme, authority, query, fragment } + return .absoluteForm { path, scheme, authority, query, fragment := none } (by simp) -- authority-form = host ":" port authority : Parser RequestTarget := attempt do diff --git a/tests/lean/run/async_http_encode.lean b/tests/lean/run/async_http_encode.lean index c61f172cb5cb..ec21c13220da 100644 --- a/tests/lean/run/async_http_encode.lean +++ b/tests/lean/run/async_http_encode.lean @@ -222,61 +222,61 @@ info: "a\x0d\n0123456789\x0d\n" info: "GET /index.html HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.get "/index.html" |>.body ()).head +#eval encodeStr (Request.get (.parse! "/index.html") |>.body ()).head /-- info: "POST /api/data HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.post "/api/data" |>.body ()).head +#eval encodeStr (Request.post (.parse! "/api/data") |>.body ()).head /-- info: "PUT /resource HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.put "/resource" |>.body ()).head +#eval encodeStr (Request.put (.parse! "/resource") |>.body ()).head /-- info: "DELETE /item HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.delete "/item" |>.body ()).head +#eval encodeStr (Request.delete (.parse! "/item") |>.body ()).head /-- info: "PATCH /update HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.patch "/update" |>.body ()).head +#eval encodeStr (Request.patch (.parse! "/update") |>.body ()).head /-- info: "HEAD /check HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.head' "/check" |>.body ()).head +#eval encodeStr (Request.head' (.parse! "/check") |>.body ()).head /-- info: "OPTIONS * HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.options "*" |>.body ()).head +#eval encodeStr (Request.options (.parse! "*") |>.body ()).head /-- info: "CONNECT proxy:8080 HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.connect "proxy:8080" |>.body ()).head +#eval encodeStr (Request.connect (.parse! "proxy:8080") |>.body ()).head /-- info: "TRACE /debug HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.trace "/debug" |>.body ()).head +#eval encodeStr (Request.trace (.parse! "/debug") |>.body ()).head /-- info: "POST /v2 HTTP/2.0\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr (Request.new |>.method .post |>.uri "/v2" |>.version .v20 |>.body ()).head +#eval encodeStr (Request.new |>.method .post |>.uri (.parse! "/v2") |>.version .v20 |>.body ()).head /-! ## Response builder -/ @@ -578,35 +578,14 @@ info: true info: "GET /search?q=hello&lang=en HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr ({ method := .get, version := .v11, uri := "/search?q=hello&lang=en" } : Request.Head) - --- URI with fragment -/-- -info: "GET /page#section HTTP/1.1\x0d\n\x0d\n" --/ -#guard_msgs in -#eval encodeStr ({ method := .get, version := .v11, uri := "/page#section" } : Request.Head) +#eval encodeStr ({ method := .get, version := .v11, uri := .parse! "/search?q=hello&lang=en" } : Request.Head) -- URI with percent-encoded characters /-- info: "GET /path%20with%20spaces HTTP/1.1\x0d\n\x0d\n" -/ #guard_msgs in -#eval encodeStr ({ method := .get, version := .v11, uri := "/path%20with%20spaces" } : Request.Head) - --- URI with special characters (brackets, colons) -/-- -info: "GET /api/v1/users/[id]:action HTTP/1.1\x0d\n\x0d\n" --/ -#guard_msgs in -#eval encodeStr ({ method := .get, version := .v11, uri := "/api/v1/users/[id]:action" } : Request.Head) - --- Empty URI -/-- -info: "GET HTTP/1.1\x0d\n\x0d\n" --/ -#guard_msgs in -#eval encodeStr ({ method := .get, version := .v11, uri := "" } : Request.Head) +#eval encodeStr ({ method := .get, version := .v11, uri :=.parse! "/path%20with%20spaces" } : Request.Head) /-! ## Edge cases: Response with unusual statuses -/ From cacfe00c1dd29daeec8b7e5162b75183b730d135 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 01:54:52 -0300 Subject: [PATCH 08/30] fix: test --- tests/lean/run/async_http_uri.lean | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index 2f7cb6944f24..a42aba80b2ba 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -833,6 +833,7 @@ info: Std.Http.RequestTarget.absoluteForm path := { segments := #["path"], absolute := true }, query := #[], fragment := none } + _ -/ #guard_msgs in #eval show IO _ from do @@ -846,6 +847,7 @@ info: Std.Http.RequestTarget.absoluteForm path := { segments := #["page"], absolute := true }, query := #[], fragment := none } + _ -/ #guard_msgs in #eval show IO _ from do From 692c7c1a09288eb84aaa01eaa63ad67a08f5d47b Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 01:56:29 -0300 Subject: [PATCH 09/30] fix: test --- tests/lean/run/async_http_uri.lean | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index a42aba80b2ba..374879257c50 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -193,7 +193,6 @@ info: some " " #eval parseCheck "/path/with/encoded%20space" #eval parseCheck "/path/with/encoded%20space/" #eval parseCheck "*" -#eval parseCheck "https://ata/b?ata=be#lol%F0%9F%94%A5" #eval parseCheck "/api/search?q=hello%20world&category=tech%2Bgames" #eval parseCheck "/" #eval parseCheck "/api/v1/users/123/posts/456/comments/789" @@ -205,7 +204,6 @@ info: some " " #eval parseCheck "http://example.com/path/to/resource?query=value" #eval parseCheck "https://api.example.com:443/v1/users?limit=10" #eval parseCheck "http://[2001:db8::1]:8080/path" -#eval parseCheck "https://example.com/page#section1" #eval parseCheck "https://xn--nxasmq6b.xn--o3cw4h/path" #eval parseCheck "localhost:65535" #eval parseCheck "https://user:pass@secure.example.com/private" @@ -245,11 +243,11 @@ info: Std.Http.RequestTarget.asteriskForm IO.println (repr result) /-- -info: some "lol🔥" +info: none -/ #guard_msgs in #eval show IO _ from do - let result ← runParser parseRequestTarget "https://ata/b?ata=be#lol%F0%9F%94%A5" + let result ← runParser parseRequestTarget "https://ata/b?ata=be" IO.println (repr (result.fragment?)) /-- @@ -300,6 +298,7 @@ info: Std.Http.RequestTarget.absoluteForm path := { segments := #["ata"], absolute := true }, query := #[], fragment := none } + _ -/ #guard_msgs in #eval show IO _ from do @@ -313,6 +312,7 @@ info: Std.Http.RequestTarget.absoluteForm path := { segments := #["path"], absolute := true }, query := #[], fragment := none } + _ -/ #guard_msgs in #eval show IO _ from do @@ -328,6 +328,7 @@ info: Std.Http.RequestTarget.absoluteForm path := { segments := #["private"], absolute := true }, query := #[], fragment := none } + _ -/ #guard_msgs in #eval show IO _ from do From 10337c620b40721cd0d26f3a99fdbe0d79885b75 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 01:57:23 -0300 Subject: [PATCH 10/30] fix: test --- tests/lean/run/async_http_uri.lean | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index 374879257c50..d3fb610fa51b 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -698,26 +698,6 @@ info: key=new |>.set "key" "new" IO.println query.toRawString --- ============================================================================ --- Fragment Tests --- ============================================================================ - -/-- -info: some "section/subsection" --/ -#guard_msgs in -#eval show IO _ from do - let result ← runParser parseRequestTarget "https://example.com/page#section%2Fsubsection" - IO.println (repr result.fragment?) - -/-- -info: some "heading with spaces" --/ -#guard_msgs in -#eval show IO _ from do - let result ← runParser parseRequestTarget "https://example.com/page#heading%20with%20spaces" - IO.println (repr result.fragment?) - /-- info: none -/ From d07f5c502f3603e7ecc98b6e24993309b8dbf786 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 10:10:34 -0300 Subject: [PATCH 11/30] feat: specialize encodedstrings --- src/Std/Internal/Http/Data/URI/Basic.lean | 60 +-- src/Std/Internal/Http/Data/URI/Encoding.lean | 392 +++++++++++++------ src/Std/Internal/Http/Data/URI/Parser.lean | 16 +- tests/lean/run/async_http_uri.lean | 46 +-- 4 files changed, 344 insertions(+), 170 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index f72c0faa993a..1f7528e87fb2 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -145,8 +145,8 @@ instance : ToString Authority where toString auth := let userPart := match auth.userInfo with | none => "" - | some ⟨name, some pass⟩ => s!"{toString (EncodedString.encode name)}:{toString (EncodedString.encode pass)}@" - | some ⟨name, none⟩ => s!"{toString (EncodedString.encode name)}@" + | some ⟨name, some pass⟩ => s!"{toString (EncodedString.encode name (r := isUnreserved))}:{toString (EncodedString.encode pass (r := isUnreserved))}@" + | some ⟨name, none⟩ => s!"{toString (EncodedString.encode name (r := isUnreserved))}@" let hostPart := toString auth.host let portPart := match auth.port with | none => "" @@ -157,14 +157,14 @@ namespace Authority end Authority /-- -Hierarchical path component of a URI. Each segment is stored as an `EncodedString` to maintain +Hierarchical path component of a URI. Each segment is stored as an `EncodedSegment` to maintain proper percent-encoding. -/ structure Path where /-- The path segments making up the hierarchical structure (each segment is percent-encoded). -/ - segments : Array EncodedString + segments : Array (EncodedSegment) /-- Whether the path is absolute (begins with '/') or relative. @@ -203,12 +203,12 @@ def join (p1 : Path) (p2 : Path) : Path := Appends a single segment to the path. The segment will be percent-encoded. -/ def append (p : Path) (segment : String) : Path := - { p with segments := p.segments.push (EncodedString.encode segment) } + { p with segments := p.segments.push (EncodedSegment.encode segment) } /-- Appends an already-encoded segment to the path. -/ -def appendEncoded (p : Path) (segment : EncodedString) : Path := +def appendEncoded (p : Path) (segment : EncodedSegment) : Path := { p with segments := p.segments.push segment } /-- @@ -216,7 +216,7 @@ Removes dot segments from the path according to RFC 3986 Section 5.2.4. This han (current directory) and ".." (parent directory) segments. -/ def normalize (p : Path) : Path := - let rec loop (input : List EncodedString) (output : List EncodedString) : List EncodedString := + let rec loop (input : List (EncodedSegment)) (output : List (EncodedSegment)) : List (EncodedSegment) := match input with | [] => output.reverse @@ -244,11 +244,11 @@ end Path /-- Query string represented as an array of key-value pairs. Both keys and values are stored as -`EncodedQueryString` for proper application/x-www-form-urlencoded encoding. Values are optional to +`EncodedQueryParam` for proper application/x-www-form-urlencoded encoding. Values are optional to support parameters without values (e.g., "?flag"). Order is preserved based on insertion order. -/ @[expose] -def Query := Array (EncodedQueryString × Option EncodedQueryString) +def Query := Array (EncodedQueryParam × Option EncodedQueryParam) deriving Repr, Inhabited namespace Query @@ -257,7 +257,7 @@ namespace Query Extracts all unique query parameter names. -/ @[expose] -def names (query : Query) : Array EncodedQueryString := +def names (query : Query) : Array EncodedQueryParam := query.map (fun p => p.fst) |> Array.toList |> List.eraseDups @@ -267,7 +267,7 @@ def names (query : Query) : Array EncodedQueryString := Extracts all query parameter values. -/ @[expose] -def values (query : Query) : Array (Option EncodedQueryString) := +def values (query : Query) : Array (Option EncodedQueryParam) := query.map (fun p => p.snd) /-- @@ -275,33 +275,33 @@ Returns the query as an array of (key, value) pairs. This is an identity functio already an array of pairs. -/ @[expose] -def toArray (query : Query) : Array (EncodedQueryString × Option EncodedQueryString) := +def toArray (query : Query) : Array (EncodedQueryParam × Option EncodedQueryParam) := query /-- Formats a query parameter as a string in the format "key" or "key=value". The key and value are -already percent-encoded as `EncodedQueryString`. +already percent-encoded as `EncodedQueryParam`. -/ -def formatQueryParam (key : EncodedQueryString) (value : Option EncodedQueryString) : String := +def formatQueryParam (key : EncodedQueryParam) (value : Option EncodedQueryParam) : String := match value with | none => toString key | some v => s!"{toString key}={toString v}" /-- Finds the first value of a query parameter by key name. Returns `none` if the key is not found. -The value remains encoded as `EncodedQueryString`. +The value remains encoded as `EncodedQueryParam`. -/ -def find? (query : Query) (key : String) : Option (Option EncodedQueryString) := - let encodedKey := EncodedQueryString.encode key +def find? (query : Query) (key : String) : Option (Option EncodedQueryParam) := + let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key let matchingKey := Array.find? (fun x => x.fst.toByteArray = encodedKey.toByteArray) query matchingKey.map (fun x => x.snd) /-- Finds all values of a query parameter by key name. Returns an empty array if the key is not found. -The values remain encoded as `EncodedQueryString`. +The values remain encoded as `EncodedQueryParam`. -/ -def findAll (query : Query) (key : String) : Array (Option EncodedQueryString) := - let encodedKey := EncodedQueryString.encode key +def findAll (query : Query) (key : String) : Array (Option EncodedQueryParam) := + let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key query.filterMap (fun x => if x.fst.toByteArray = encodedKey.toByteArray then some (x.snd) @@ -311,14 +311,14 @@ def findAll (query : Query) (key : String) : Array (Option EncodedQueryString) : Adds a query parameter to the query string. -/ def insert (query : Query) (key : String) (value : String) : Query := - let encodedKey := EncodedQueryString.encode key - let encodedValue := EncodedQueryString.encode value + let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key + let encodedValue : EncodedQueryParam := EncodedQueryParam.encode value query.push (encodedKey, some encodedValue) /-- Adds a query parameter to the query string. -/ -def insertEncoded (query : Query) (key : EncodedQueryString) (value : Option EncodedQueryString) : Query := +def insertEncoded (query : Query) (key : EncodedQueryParam) (value : Option EncodedQueryParam) : Query := query.push (key, value) /-- @@ -329,21 +329,21 @@ def empty : Query := #[] /-- Creates a query string from a list of key-value pairs. -/ -def ofList (pairs : List (EncodedQueryString × Option EncodedQueryString)) : Query := +def ofList (pairs : List (EncodedQueryParam × Option EncodedQueryParam)) : Query := pairs.toArray /-- Checks if a query parameter exists. -/ def contains (query : Query) (key : String) : Bool := - let encodedKey := EncodedQueryString.encode key + let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key query.any (fun x => x.fst.toByteArray = encodedKey.toByteArray) /-- Removes all occurrences of a query parameter by key name. -/ def erase (query : Query) (key : String) : Query := - let encodedKey := EncodedQueryString.encode key + let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key -- Filter out matching keys query.filter (fun x => x.fst.toByteArray ≠ encodedKey.toByteArray) @@ -354,7 +354,7 @@ Returns `none` if the key is not found or if the value cannot be decoded as UTF- def get (query : Query) (key : String) : Option String := match query.find? key with | none => none - | some none => some "" -- Key exists but has no value + | some none => some "" -- Key exists but has Pno value | some (some encoded) => encoded.decode /-- @@ -440,7 +440,7 @@ instance : ToString URI where | some auth => s!"//{toString auth}" let pathPart := toString uri.path let queryPart := toString uri.query - let fragmentPart := uri.fragment.map (fun f => "#" ++ toString (URI.EncodedString.encode f)) |>.getD "" + let fragmentPart := uri.fragment.map (fun f => "#" ++ toString (URI.EncodedFragment.encode f)) |>.getD "" s!"{schemePart}:{authorityPart}{pathPart}{queryPart}{fragmentPart}" namespace URI @@ -607,13 +607,13 @@ def build (b : Builder) : URI := else none let path : Path := { - segments := b.pathSegments.map EncodedString.encode + segments := b.pathSegments.map EncodedSegment.encode absolute := true } let query := b.query.map fun (k, v) => - (EncodedQueryString.encode k, v.map EncodedQueryString.encode) + (EncodedQueryParam.encode k, v.map EncodedQueryParam.encode) let query := URI.Query.ofList query.toList diff --git a/src/Std/Internal/Http/Data/URI/Encoding.lean b/src/Std/Internal/Http/Data/URI/Encoding.lean index 5e70a73abf37..bc909e2ef354 100644 --- a/src/Std/Internal/Http/Data/URI/Encoding.lean +++ b/src/Std/Internal/Http/Data/URI/Encoding.lean @@ -59,41 +59,81 @@ def isUnreserved (c : UInt8) : Bool := isAlphaNum c || (c = '-'.toUInt8 || c = '.'.toUInt8 || c = '_'.toUInt8 || c = '~'.toUInt8) +/-- +Checks if a byte is a sub-delimiter character according to RFC 3986. +Sub-delimiters are: `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`. +-/ +def isSubDelims (c : UInt8) : Bool := + c = '!'.toUInt8 || c = '$'.toUInt8 || c = '&'.toUInt8 || c = '\''.toUInt8 || + c = '('.toUInt8 || c = ')'.toUInt8 || c = '*'.toUInt8 || c = '+'.toUInt8 || + c = ','.toUInt8 || c = ';'.toUInt8 || c = '='.toUInt8 + +/-- +Checks if a byte is a valid path character (`pchar`) according to RFC 3986. +`pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` + +Note: The percent-encoding (`pct-encoded`) is handled separately by `isEncodedChar`, +so this predicate only covers the non-percent characters. +-/ +def isPChar (c : UInt8) : Bool := + isUnreserved c || isSubDelims c || c = ':'.toUInt8 || c = '@'.toUInt8 + +/-- +Checks if a byte is a valid character in a URI query component according to RFC 3986. +`query = *( pchar / "/" / "?" )` +-/ +def isQueryChar (c : UInt8) : Bool := + isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8 + +/-- +Checks if a byte is a valid character in a URI fragment component according to RFC 3986. +`fragment = *( pchar / "/" / "?" )` +-/ +def isFragmentChar (c : UInt8) : Bool := + isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8 + +/-- +Checks if a byte is a valid character in a URI userinfo component according to RFC 3986. +`userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` +-/ +def isUserInfoChar (c : UInt8) : Bool := + isUnreserved c || isSubDelims c || c = ':'.toUInt8 + /-- Checks if a byte is a valid character in a percent-encoded URI component. Valid characters are unreserved characters or the percent sign (for escape sequences). -/ -def isEncodedChar (c : UInt8) : Bool := - isUnreserved c || c = '%'.toUInt8 +def isEncodedChar (rule : UInt8 → Bool) (c : UInt8) : Bool := + isAscii c ∧ (rule c ∨ isHexDigit c ∨ c = '%'.toUInt8) /-- Checks if a byte is valid in a percent-encoded query string component. Extends `isEncodedChar` to also allow '+' which represents space in application/x-www-form-urlencoded format. -/ -def isEncodedQueryChar (c : UInt8) : Bool := - isEncodedChar c || c = '+'.toUInt8 +def isEncodedQueryChar (rule : UInt8 → Bool) (c : UInt8) : Bool := + isEncodedChar rule c ∨ c = '+'.toUInt8 /-- Checks if all characters in a `ByteArray` are allowed in an encoded URI component. This is a fast check that only verifies the character set, not full encoding validity. -/ @[inline] -abbrev isAllowedEncodedChars (s : ByteArray) : Prop := - s.data.all isEncodedChar +abbrev isAllowedEncodedChars (rule : UInt8 → Bool) (s : ByteArray) : Prop := + s.data.all (isEncodedChar rule) -instance : Decidable (isAllowedEncodedChars s) := - inferInstanceAs (Decidable (s.data.all isEncodedChar = true)) +instance : Decidable (isAllowedEncodedChars r s) := + inferInstanceAs (Decidable (s.data.all (isEncodedChar r) = true)) /-- Checks if all characters in a `ByteArray` are allowed in an encoded query parameter. Allows '+' as an alternative encoding for space (application/x-www-form-urlencoded). -/ @[inline] -abbrev isAllowedEncodedQueryChars (s : ByteArray) : Prop := - s.data.all isEncodedQueryChar +abbrev isAllowedEncodedQueryChars (rule : UInt8 → Bool) (s : ByteArray) : Prop := + s.data.all (isEncodedQueryChar rule) -instance : Decidable (isAllowedEncodedQueryChars s) := - inferInstanceAs (Decidable (s.data.all isEncodedQueryChar = true)) +instance : Decidable (isAllowedEncodedQueryChars r s) := + inferInstanceAs (Decidable (s.data.all (isEncodedQueryChar r) = true)) /-- Validates that all percent signs in a byte array are followed by exactly two hexadecimal digits. @@ -144,36 +184,26 @@ def hexDigitToUInt8? (c : UInt8) : Option UInt8 := else none -theorem isAllowedEncodedChars.push {bs : ByteArray} (h : isAllowedEncodedChars bs) (h₁ : isEncodedChar c) : - isAllowedEncodedChars (bs.push c) := by +private theorem isAllowedEncodedChars.push {bs : ByteArray} (h : isAllowedEncodedChars r bs) (h₁ : isEncodedChar r c) : + isAllowedEncodedChars r (bs.push c) := by simpa [isAllowedEncodedChars, ByteArray.push, Array.all_push, And.intro h h₁] -theorem isAllowedEncodedQueryChars.push {bs : ByteArray} (h : isAllowedEncodedQueryChars bs) (h₁ : isEncodedQueryChar c) : - isAllowedEncodedQueryChars (bs.push c) := by +private theorem isAllowedEncodedQueryChars.push {bs : ByteArray} (h : isAllowedEncodedQueryChars r bs) (h₁ : isEncodedQueryChar r c) : + isAllowedEncodedQueryChars r (bs.push c) := by simpa [isAllowedEncodedQueryChars, ByteArray.push, Array.all_push, And.intro h h₁] -theorem isAlphaNum_isAscii {c : UInt8} (h : isAlphaNum c) : isAscii c := by - unfold isAlphaNum isAscii at * - simp at h - rcases h with ⟨h1, h2⟩ - next => simp; exact Nat.lt_of_le_of_lt h2 (by decide) - next h => simp; exact Nat.lt_of_le_of_lt h.2 (by decide) - next h => simp; exact Nat.lt_of_le_of_lt h.2 (by decide) - -theorem isEncodedChar_isAscii (c : UInt8) (h : isEncodedChar c) : isAscii c := by - unfold isEncodedChar isUnreserved at * - cases h' : isAlphaNum c - · simp [h'] at *; rcases h with ⟨h, h⟩ | h | h | h <;> (subst_vars; decide) - · simp [h'] at h; exact (isAlphaNum_isAscii h') - -theorem isEncodedQueryChar_isAscii (c : UInt8) (h : isEncodedQueryChar c) : isAscii c := by +private theorem isEncodedChar_isAscii (c : UInt8) (h : isEncodedChar r c) : isAscii c := by + simp [isEncodedChar, isAscii] at * + exact h.left + +private theorem isEncodedQueryChar_isAscii (c : UInt8) (h : isEncodedQueryChar r c) : isAscii c := by unfold isEncodedQueryChar isAscii at * simp at h rcases h next h => exact isEncodedChar_isAscii c h next h => subst_vars; decide -theorem hexDigit_isHexDigit (h₀ : x < 16) : isHexDigit (hexDigit x) := by +private theorem hexDigit_isHexDigit (h₀ : x < 16) : isHexDigit (hexDigit x) := by unfold hexDigit isHexDigit have h₁ : x.toNat < 16 := h₀ split <;> simp [Char.toUInt8] @@ -200,29 +230,22 @@ theorem hexDigit_isHexDigit (h₀ : x < 16) : isHexDigit (hexDigit x) := by · simpa [UInt8.ofNat_add, UInt8.ofNat_sub (by omega : 10 ≤ x.toNat)] using UInt8.ofNat_le_iff_le h₄ (by decide : 70 < 256) |>.mpr h₅ -theorem isHexDigit_isAlphaNum {c : UInt8} (h : isHexDigit c) : isAlphaNum c := by - unfold isHexDigit isAlphaNum at * - simp at h ⊢ - rcases h with ⟨h1, h2⟩ - next => exact Or.inl (Or.inl ⟨h1, h2⟩) - next h => exact Or.inl (Or.inr ⟨h.1, Nat.le_trans h.2 (by decide)⟩) - next h => exact Or.inr ⟨h.1, Nat.le_trans h.2 (by decide)⟩ +private theorem isHexDigit_isAscii {c : UInt8} (h : isHexDigit c) : isAscii c := by + simp [isHexDigit, isAscii, Char.toUInt8] at * + rcases h with ⟨h1, h2⟩ | ⟨h1, h2⟩ + · exact UInt8.lt_of_le_of_lt h2 (by decide) + next h => exact UInt8.lt_of_le_of_lt h.right (by decide) + · exact UInt8.lt_of_le_of_lt h2 (by decide) -theorem isAlphaNum_isEncodedChar {c : UInt8} (h : isAlphaNum c) : isEncodedChar c := by - unfold isEncodedChar isUnreserved +private theorem isHexDigit_isEncodedChar {c : UInt8} (h : isHexDigit c) : isEncodedChar r c := by + unfold isEncodedChar simp at * - exact Or.inl (Or.inl h) + exact And.intro (isHexDigit_isAscii h) (Or.inr (Or.inl h)) -theorem isAlphaNum_isEncodedQueryChar {c : UInt8} (h : isAlphaNum c) : isEncodedQueryChar c := by - unfold isEncodedQueryChar isEncodedChar isUnreserved +private theorem isHexDigit_isEncodedQueryChar {c : UInt8} (h : isHexDigit c) : isEncodedQueryChar r c := by + unfold isEncodedQueryChar isEncodedChar simp at * - exact Or.inl (Or.inl (Or.inl h)) - -theorem isHexDigit_isEncodedChar {c : UInt8} (h : isHexDigit c) : isEncodedChar c := - isAlphaNum_isEncodedChar (isHexDigit_isAlphaNum h) - -theorem isHexDigit_isEncodedQueryChar {c : UInt8} (h : isHexDigit c) : isEncodedQueryChar c := - isAlphaNum_isEncodedQueryChar (isHexDigit_isAlphaNum h) + exact Or.inl (And.intro (isHexDigit_isAscii h) (Or.inr (Or.inl h))) theorem all_of_all_of_imp {b : ByteArray} (h : b.data.all p) (imp : ∀ c, p c → q c) : b.data.all q := by rw [Array.all_eq] at * @@ -230,7 +253,7 @@ theorem all_of_all_of_imp {b : ByteArray} (h : b.data.all p) (imp : ∀ c, p c intro i x exact (imp b.data[i]) (h i x) -theorem autf8EncodeChar_flatMap_ascii {a : List UInt8} +private theorem autf8EncodeChar_flatMap_ascii {a : List UInt8} (is_ascii_list : ∀ (x : UInt8), x ∈ a → x < 128) : List.flatMap (fun a => String.utf8EncodeChar (Char.ofUInt8 a)) a = a := by have h_encode {i : UInt8} (h : i < 128) : String.utf8EncodeChar (Char.ofUInt8 i) = [i] := by @@ -246,13 +269,13 @@ theorem autf8EncodeChar_flatMap_ascii {a : List UInt8} exact is_ascii_list x (by simp [hx]) · exact is_ascii_list head (by simp) -theorem List.toByteArray_loop_eq (xs : List UInt8) (acc : ByteArray) : +private theorem List.toByteArray_loop_eq (xs : List UInt8) (acc : ByteArray) : (List.toByteArray.loop xs acc).data = acc.data ++ xs.toArray := by induction xs generalizing acc with | nil => simp [List.toByteArray.loop] | cons x xs ih => simp [List.toByteArray.loop, ih, Array.push] -theorem ByteArray.toList_toByteArray (ba : ByteArray) : +private theorem ByteArray.toList_toByteArray (ba : ByteArray) : ba.data.toList.toByteArray = ba := by cases ba with | mk data => @@ -280,7 +303,7 @@ This provides type-safe URI encoding without runtime validation. The invariant guarantees that the string contains only unreserved characters (alphanumeric, hyphen, period, underscore, tilde) and percent signs (for escape sequences). -/ -structure EncodedString where +structure EncodedString (r : UInt8 → Bool) where private mk :: /-- @@ -291,40 +314,46 @@ structure EncodedString where /-- Proof that all characters in the byte array are valid encoded characters. -/ - valid : isAllowedEncodedChars toByteArray + valid : isAllowedEncodedChars r toByteArray namespace EncodedString /-- Creates an empty encoded string. -/ -def empty : EncodedString := - ⟨.empty, by native_decide⟩ +def empty : EncodedString r := + ⟨.empty, by simp []; exact fun i h => by contradiction⟩ -instance : Inhabited EncodedString where +instance : Inhabited (EncodedString r) where default := EncodedString.empty /-- Appends a single encoded character to an encoded string. Requires that the character is not '%' to maintain the percent-encoding invariant. -/ -private def push (s : EncodedString) (c : UInt8) (h : isEncodedChar c) : EncodedString := +private def push (s : EncodedString r) (c : UInt8) (h : isEncodedChar r c) : EncodedString r := ⟨s.toByteArray.push c, isAllowedEncodedChars.push s.valid h⟩ /-- Converts a byte to its percent-encoded hexadecimal representation (%XX). For example, a space character (0x20) becomes "%20". -/ -private def byteToHex (b : UInt8) (s : EncodedString) : EncodedString := +private def byteToHex (b : UInt8) (s : EncodedString r) : EncodedString r := let ba := s.toByteArray.push '%'.toUInt8 |>.push (hexDigit (b >>> 4)) |>.push (hexDigit (b &&& 0xF)) let valid := by - have h1 : isEncodedChar '%'.toUInt8 := by decide - have h2 : isEncodedChar (hexDigit (b >>> 4)) := - isHexDigit_isEncodedChar (hexDigit_isHexDigit (BitVec.toNat_ushiftRight_lt b.toBitVec 4 (by decide))) - have h3 : isEncodedChar (hexDigit (b &&& 0xF)) := - isHexDigit_isEncodedChar (hexDigit_isHexDigit (@UInt8.and_lt_add_one b 0xF (by decide))) + have h1 : isEncodedChar r '%'.toUInt8 := + by simp [isEncodedChar]; decide + + have h2 : isEncodedChar r (hexDigit (b >>> 4)) := + let h₀ := hexDigit_isHexDigit (BitVec.toNat_ushiftRight_lt b.toBitVec 4 (by decide)) + isHexDigit_isEncodedChar h₀ + + have h3 : isEncodedChar r (hexDigit (b &&& 0xF)) := + let h₀ := hexDigit_isHexDigit (@UInt8.and_lt_add_one b 0xF (by decide)) + isHexDigit_isEncodedChar h₀ + exact isAllowedEncodedChars.push (isAllowedEncodedChars.push (isAllowedEncodedChars.push s.valid h1) h2) h3 ⟨ba, valid⟩ @@ -332,10 +361,10 @@ private def byteToHex (b : UInt8) (s : EncodedString) : EncodedString := Encodes a raw string into an `EncodedString` with automatic proof construction. Unreserved characters (alphanumeric, hyphen, period, underscore, tilde) are kept as-is, while all other characters are percent-encoded. -/ -def encode (s : String) : EncodedString := +def encode (s : String) : EncodedString r := s.toUTF8.foldl (init := EncodedString.empty) fun acc c => - if h : isUnreserved c then - acc.push c (by simp [isEncodedChar]; exact Or.inl h) + if h : isAscii c ∧ r c then + acc.push c (by simp [isEncodedChar]; exact And.intro h.left (Or.inl h.right)) else byteToHex c acc @@ -343,15 +372,15 @@ def encode (s : String) : EncodedString := Attempts to create an `EncodedString` from a `ByteArray`. Returns `some` if the byte array contains only valid encoded characters and all percent signs are followed by exactly two hex digits, `none` otherwise. -/ -def ofByteArray? (ba : ByteArray) : Option EncodedString := - if h : isAllowedEncodedChars ba then +def ofByteArray? (ba : ByteArray) : Option (EncodedString r) := + if h : isAllowedEncodedChars r ba then if isValidPercentEncoding ba then some ⟨ba, h⟩ else none else none /-- Creates an `EncodedString` from a `ByteArray`, panicking if the byte array is invalid. -/ -def ofByteArray! (ba : ByteArray) : EncodedString := +def ofByteArray! (ba : ByteArray) : EncodedString r := match ofByteArray? ba with | some es => es | none => panic! "invalid encoded string" @@ -360,30 +389,30 @@ def ofByteArray! (ba : ByteArray) : EncodedString := Creates an `EncodedString` from a `String` by checking if it's already a valid percent-encoded string. Returns `some` if valid, `none` otherwise. -/ -def ofString? (s : String) : Option EncodedString := +def ofString? (s : String) : Option (EncodedString r) := ofByteArray? s.toUTF8 /-- Creates an `EncodedString` from a `String`, panicking if the string is not a valid percent-encoded string. -/ -def ofString! (s : String) : EncodedString := +def ofString! (s : String) : EncodedString r := ofByteArray! s.toUTF8 /-- Creates an `EncodedString` from a `ByteArray` with compile-time proofs. Use this when you have proofs that the byte array is valid. -/ -def new (ba : ByteArray) (valid : isAllowedEncodedChars ba) (_validEncoding : isValidPercentEncoding ba) : EncodedString := +def new (ba : ByteArray) (valid : isAllowedEncodedChars r ba) (_validEncoding : isValidPercentEncoding ba) : EncodedString r := ⟨ba, valid⟩ -instance : ToString EncodedString where - toString es := ⟨es.toByteArray, ascii_is_valid_utf8 es.toByteArray (all_of_all_of_imp es.valid isEncodedChar_isAscii)⟩ +instance : ToString (EncodedString r) where + toString es := ⟨es.toByteArray, ascii_is_valid_utf8 es.toByteArray (all_of_all_of_imp es.valid (fun c h => by simp [isEncodedChar] at h; exact h.left))⟩ /-- Decodes an `EncodedString` back to a regular `String`. Converts percent-encoded sequences (e.g., "%20") back to their original characters. Returns `none` if the decoded bytes are not valid UTF-8. -/ -def decode (es : EncodedString) : Option String := Id.run do +def decode (es : EncodedString r) : Option String := Id.run do let mut decoded : ByteArray := ByteArray.empty let rawBytes := es.toByteArray let len := rawBytes.size @@ -408,13 +437,13 @@ def decode (es : EncodedString) : Option String := Id.run do (decoded.push c, i + 1) return String.fromUTF8? decoded -instance : Repr EncodedString where - reprPrec es := reprPrec (toString es) +instance : Repr (EncodedString r) where + reprPrec es n := reprPrec (toString es) n -instance : BEq EncodedString where +instance : BEq (EncodedString r) where beq x y := x.toByteArray = y.toByteArray -instance : Hashable EncodedString where +instance : Hashable (EncodedString r) where hash x := Hashable.hash x.toByteArray end EncodedString @@ -427,7 +456,7 @@ application/x-www-form-urlencoded format. This type is specifically designed for encoding query parameters where spaces can be represented as '+' instead of "%20". -/ -structure EncodedQueryString where +structure EncodedQueryString (r : UInt8 → Bool) where private mk :: /-- @@ -438,38 +467,38 @@ structure EncodedQueryString where /-- Proof that all characters in the byte array are valid encoded query characters. -/ - valid : isAllowedEncodedQueryChars toByteArray + valid : isAllowedEncodedQueryChars r toByteArray namespace EncodedQueryString /-- Creates an empty encoded query string. -/ -def empty : EncodedQueryString := - ⟨.empty, by native_decide⟩ +def empty : EncodedQueryString r := + ⟨.empty, by simp; intro a h; contradiction⟩ -instance : Inhabited EncodedQueryString where +instance : Inhabited (EncodedQueryString r) where default := EncodedQueryString.empty /-- Appends a single encoded query character to an encoded query string. -/ -private def push (s : EncodedQueryString) (c : UInt8) (h : isEncodedQueryChar c) : EncodedQueryString := +private def push (s : EncodedQueryString r) (c : UInt8) (h : isEncodedQueryChar r c) : EncodedQueryString r := ⟨s.toByteArray.push c, isAllowedEncodedQueryChars.push s.valid h⟩ /-- Attempts to create an `EncodedQueryString` from a `ByteArray`. Returns `some` if the byte array contains only valid encoded query characters and all percent signs are followed by exactly two hex digits, `none` otherwise. -/ -def ofByteArray? (ba : ByteArray) : Option EncodedQueryString := - if h : isAllowedEncodedQueryChars ba then +def ofByteArray? (ba : ByteArray) : Option (EncodedQueryString r) := + if h : isAllowedEncodedQueryChars r ba then if isValidPercentEncoding ba then some ⟨ba, h⟩ else none else none /-- Creates an `EncodedQueryString` from a `ByteArray`, panicking if the byte array is invalid. -/ -def ofByteArray! (ba : ByteArray) : EncodedQueryString := +def ofByteArray! (ba : ByteArray) : EncodedQueryString r := match ofByteArray? ba with | some es => es | none => panic! "invalid encoded query string" @@ -478,35 +507,36 @@ def ofByteArray! (ba : ByteArray) : EncodedQueryString := Creates an `EncodedQueryString` from a `String` by checking if it's already a valid percent-encoded string. Returns `some` if valid, `none` otherwise. -/ -def ofString? (s : String) : Option EncodedQueryString := +def ofString? (s : String) : Option (EncodedQueryString r) := ofByteArray? s.toUTF8 /-- Creates an `EncodedQueryString` from a `String`, panicking if the string is not a valid percent-encoded string. -/ -def ofString! (s : String) : EncodedQueryString := +def ofString! (s : String) : EncodedQueryString r := ofByteArray! s.toUTF8 /-- Creates an `EncodedQueryString` from a `ByteArray` with compile-time proofs. Use this when you have proofs that the byte array is valid. -/ -def new (ba : ByteArray) (valid : isAllowedEncodedQueryChars ba) (_validEncoding : isValidPercentEncoding ba) : EncodedQueryString := +def new (ba : ByteArray) (valid : isAllowedEncodedQueryChars r ba) (_validEncoding : isValidPercentEncoding ba) : EncodedQueryString r := ⟨ba, valid⟩ /-- Converts a byte to its percent-encoded hexadecimal representation (%XX). For example, a space character (0x20) becomes "%20". -/ -private def byteToHex (b : UInt8) (s : EncodedQueryString) : EncodedQueryString := +private def byteToHex (b : UInt8) (s : EncodedQueryString r) : EncodedQueryString r := let ba := s.toByteArray.push '%'.toUInt8 |>.push (hexDigit (b >>> 4)) |>.push (hexDigit (b &&& 0xF)) let valid := by - have h1 : isEncodedQueryChar '%'.toUInt8 := by decide - have h2 : isEncodedQueryChar (hexDigit (b >>> 4)) := + have h1 : isEncodedQueryChar r '%'.toUInt8 := by + simp [isEncodedQueryChar, isEncodedChar]; decide + have h2 : isEncodedQueryChar r (hexDigit (b >>> 4)) := isHexDigit_isEncodedQueryChar (hexDigit_isHexDigit (BitVec.toNat_ushiftRight_lt b.toBitVec 4 (by decide))) - have h3 : isEncodedQueryChar (hexDigit (b &&& 0xF)) := + have h3 : isEncodedQueryChar r (hexDigit (b &&& 0xF)) := isHexDigit_isEncodedQueryChar (hexDigit_isHexDigit (@UInt8.and_lt_add_one b 0xF (by decide))) exact isAllowedEncodedQueryChars.push (isAllowedEncodedQueryChars.push (isAllowedEncodedQueryChars.push s.valid h1) h2) h3 ⟨ba, valid⟩ @@ -515,17 +545,20 @@ private def byteToHex (b : UInt8) (s : EncodedQueryString) : EncodedQueryString Encodes a raw string into an `EncodedQueryString` with automatic proof construction. Unreserved characters are kept as-is, spaces are encoded as '+', and all other characters are percent-encoded. -/ -def encode (s : String) : EncodedQueryString := +def encode {r} (s : String) : EncodedQueryString r := s.toUTF8.foldl (init := EncodedQueryString.empty) fun acc c => - if h : isUnreserved c then - acc.push c (by simp [isEncodedQueryChar, isEncodedChar]; exact Or.inl (Or.inl h)) + if h : isAscii c ∧ r c then + acc.push c (by simp [isEncodedQueryChar, isEncodedChar]; exact Or.inl (And.intro h.left (Or.inl h.right))) else if _ : c = ' '.toUInt8 then acc.push '+'.toUInt8 (by simp [isEncodedQueryChar]) else byteToHex c acc -instance : ToString EncodedQueryString where - toString es := ⟨es.toByteArray, ascii_is_valid_utf8 es.toByteArray (all_of_all_of_imp es.valid isEncodedQueryChar_isAscii)⟩ +/-- +Converts an `EncodedQueryString` to a `String`, given a proof that all characters satisfying `r` are ASCII. +-/ +def toString (es : EncodedQueryString r) : String := + ⟨es.toByteArray, ascii_is_valid_utf8 es.toByteArray (all_of_all_of_imp es.valid (fun c h => isEncodedQueryChar_isAscii c h))⟩ /-- Decodes an `EncodedQueryString` back to a regular `String`. Converts percent-encoded sequences and '+' @@ -533,7 +566,7 @@ signs back to their original characters. Returns `none` if the decoded bytes are This is almost the same code from `System.Uri.UriEscape.decodeUri`, but with `Option` instead. -/ -def decode (es : EncodedQueryString) : Option String := Id.run do +def decode (es : EncodedQueryString r) : Option String := Id.run do let mut decoded : ByteArray := ByteArray.empty let rawBytes := es.toByteArray let len := rawBytes.size @@ -563,18 +596,159 @@ def decode (es : EncodedQueryString) : Option String := Id.run do end EncodedQueryString -instance : Repr EncodedQueryString where - reprPrec es := reprPrec (toString es) +instance : ToString (EncodedQueryString r) where + toString := EncodedQueryString.toString -instance : BEq EncodedQueryString where +instance : Repr (EncodedQueryString r) where + reprPrec es n := reprPrec (toString es) n + +instance : BEq (EncodedQueryString r) where beq x y := x.toByteArray = y.toByteArray -instance : Hashable EncodedQueryString where +instance : Hashable (EncodedQueryString r) where hash x := Hashable.hash x.toByteArray -instance : Hashable (Option EncodedQueryString) where +instance : Hashable (Option (EncodedQueryString r)) where hash | some x => Hashable.hash ((ByteArray.mk #[1] ++ x.toByteArray)) | none => Hashable.hash (ByteArray.mk #[0]) +/-- +A percent-encoded URI path segment. Valid characters are `pchar` (unreserved, sub-delims, ':', '@'). +-/ +abbrev EncodedSegment := EncodedString isPChar + +namespace EncodedSegment + +/-- +Encodes a raw string into an encoded path segment. +-/ +def encode (s : String) : EncodedSegment := + EncodedString.encode (r := isPChar) s + +/-- +Attempts to create an encoded path segment from raw bytes. +-/ +def ofByteArray? (ba : ByteArray) : Option EncodedSegment := + EncodedString.ofByteArray? (r := isPChar) ba + +/-- +Creates an encoded path segment from raw bytes, panicking on invalid encoding. +-/ +def ofByteArray! (ba : ByteArray) : EncodedSegment := + EncodedString.ofByteArray! (r := isPChar) ba + +/-- +Decodes an encoded path segment back to a UTF-8 string. +-/ +def decode (segment : EncodedSegment) : Option String := + EncodedString.decode segment + +end EncodedSegment + +/-- +A percent-encoded URI fragment component. Valid characters are `pchar / "/" / "?"`. +-/ +abbrev EncodedFragment := EncodedString isFragmentChar + +namespace EncodedFragment + +/-- +Encodes a raw string into an encoded fragment component. +-/ +def encode (s : String) : EncodedFragment := + EncodedString.encode (r := isFragmentChar) s + +/-- +Attempts to create an encoded fragment component from raw bytes. +-/ +def ofByteArray? (ba : ByteArray) : Option EncodedFragment := + EncodedString.ofByteArray? (r := isFragmentChar) ba + +/-- +Creates an encoded fragment component from raw bytes, panicking on invalid encoding. +-/ +def ofByteArray! (ba : ByteArray) : EncodedFragment := + EncodedString.ofByteArray! (r := isFragmentChar) ba + +/-- +Decodes an encoded fragment component back to a UTF-8 string. +-/ +def decode (fragment : EncodedFragment) : Option String := + EncodedString.decode fragment + +end EncodedFragment + +/-- +A percent-encoded URI userinfo component. Valid characters are `unreserved / sub-delims / ":"`. +-/ +abbrev EncodedUserInfo := EncodedString isUserInfoChar + +namespace EncodedUserInfo + +/-- +Encodes a raw string into an encoded userinfo component. +-/ +def encode (s : String) : EncodedUserInfo := + EncodedString.encode (r := isUserInfoChar) s + +/-- +Attempts to create an encoded userinfo component from raw bytes. +-/ +def ofByteArray? (ba : ByteArray) : Option EncodedUserInfo := + EncodedString.ofByteArray? (r := isUserInfoChar) ba + +/-- +Creates an encoded userinfo component from raw bytes, panicking on invalid encoding. +-/ +def ofByteArray! (ba : ByteArray) : EncodedUserInfo := + EncodedString.ofByteArray! (r := isUserInfoChar) ba + +/-- +Decodes an encoded userinfo component back to a UTF-8 string. +-/ +def decode (userInfo : EncodedUserInfo) : Option String := + EncodedString.decode userInfo + +end EncodedUserInfo + +/-- +A percent-encoded URI query parameter. Valid characters are `pchar / "/" / "?"` with '+' for spaces. +-/ +abbrev EncodedQueryParam := EncodedQueryString isQueryChar + +namespace EncodedQueryParam + +/-- +Encodes a raw string into an encoded query parameter. +-/ +def encode (s : String) : EncodedQueryParam := + EncodedQueryString.encode (r := isQueryChar) s + +/-- +Attempts to create an encoded query parameter from raw bytes. +-/ +def ofByteArray? (ba : ByteArray) : Option EncodedQueryParam := + EncodedQueryString.ofByteArray? (r := isQueryChar) ba + +/-- +Creates an encoded query parameter from raw bytes, panicking on invalid encoding. +-/ +def ofByteArray! (ba : ByteArray) : EncodedQueryParam := + EncodedQueryString.ofByteArray! (r := isQueryChar) ba + +/-- +Attempts to create an encoded query parameter from an encoded string. +-/ +def fromString? (s : String) : Option EncodedQueryParam := + EncodedQueryString.ofString? (r := isQueryChar) s + +/-- +Decodes an encoded query parameter back to a UTF-8 string. +-/ +def decode (param : EncodedQueryParam) : Option String := + EncodedQueryString.decode param + +end EncodedQueryParam + end Std.Http.URI diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index f28bb76c2de5..5849369d74bf 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -134,7 +134,7 @@ private def parsePortNumber : Parser UInt16 := do private def parseUserInfo : Parser URI.UserInfo := do let userBytesName ← takeWhileUpTo (fun x => x ≠ ':'.toUInt8 ∧ isUserInfoChar x) 1024 - let some userName := URI.EncodedString.ofByteArray? userBytesName.toByteArray + let some userName := URI.EncodedUserInfo.ofByteArray? userBytesName.toByteArray | fail "invalid percent encoding in user info" let userPass ← if ← peekIs (· == ':'.toUInt8) then @@ -142,7 +142,7 @@ private def parseUserInfo : Parser URI.UserInfo := do let userBytesPass ← takeWhileUpTo isUserInfoChar 1024 - let some userStrPass := URI.EncodedString.ofByteArray? userBytesPass.toByteArray >>= URI.EncodedString.decode + let some userStrPass := URI.EncodedUserInfo.ofByteArray? userBytesPass.toByteArray >>= URI.EncodedUserInfo.decode | fail "invalid percent encoding in user info" pure <| some userStrPass @@ -272,7 +272,7 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do else acc.push val - let some segmentStr := URI.EncodedString.ofByteArray? segmentBytes + let some segmentStr := URI.EncodedSegment.fromByteArray? segmentBytes | fail "invalid percent encoding in path segment" segments := segments.push segmentStr @@ -297,11 +297,11 @@ private def parseQuery : Parser URI.Query := do let pairs : Option URI.Query := queryStr.splitOn "&" |>.foldlM (init := URI.Query.empty) fun acc pair => do match pair.splitOn "=" with | [key] => - let key ← URI.EncodedQueryString.ofString? key + let key ← URI.EncodedQueryParam.fromString? key pure (acc.insertEncoded key none) | key :: value => - let key ← URI.EncodedQueryString.ofString? key - let value ← URI.EncodedQueryString.ofString? (String.intercalate "=" value) + let key ← URI.EncodedQueryParam.fromString? key + let value ← URI.EncodedQueryParam.fromString? (String.intercalate "=" value) pure (acc.insertEncoded key (some value)) | [] => pure acc @@ -311,10 +311,10 @@ private def parseQuery : Parser URI.Query := do fail "invalid query string" -- fragment = *( pchar / "/" / "?" ) -private def parseFragment : Parser URI.EncodedString := do +private def parseFragment : Parser URI.EncodedFragment := do let fragmentBytes ← takeWhileUpTo isFragmentChar 1024 - let some fragmentStr := URI.EncodedString.ofByteArray? fragmentBytes.toByteArray + let some fragmentStr := URI.EncodedFragment.ofByteArray? fragmentBytes.toByteArray | fail "invalid percent encoding in fragment" return fragmentStr diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index d3fb610fa51b..7e115e03e73f 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -48,75 +48,75 @@ def parseCheckFail (s : String) : IO Unit := do info: some "abc" -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "abc".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "abc".toUTF8)) /-- info: some "%20" -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%20".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%20".toUTF8)) /-- info: some "hello%20world" -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "hello%20world".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "hello%20world".toUTF8)) /-- info: some "%FF" -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%FF".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%FF".toUTF8)) /-- info: some "%00" -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%00".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%00".toUTF8)) -- Invalid percent encoding: incomplete /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%".toUTF8)) /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "hello%".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "hello%".toUTF8)) /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%2".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%2".toUTF8)) /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%A".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%A".toUTF8)) -- Invalid percent encoding: non-hex characters /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%GG".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%GG".toUTF8)) /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%2G".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%2G".toUTF8)) /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedString.ofByteArray? "%G2".toUTF8)) +#eval IO.println (repr (EncodedSegment.ofByteArray? "%G2".toUTF8)) -- ============================================================================ -- Percent Encoding Decode Tests @@ -126,37 +126,37 @@ info: none info: some "abc" -/ #guard_msgs in -#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "abc".toUTF8)) +#eval IO.println (repr <| EncodedSegment.decode =<< (EncodedSegment.ofByteArray? "abc".toUTF8)) /-- info: some " " -/ #guard_msgs in -#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "%20".toUTF8)) +#eval IO.println (repr <| EncodedSegment.decode =<< (EncodedSegment.ofByteArray? "%20".toUTF8)) /-- info: some "hello world" -/ #guard_msgs in -#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "hello%20world".toUTF8)) +#eval IO.println (repr <| EncodedSegment.decode =<< (EncodedSegment.ofByteArray? "hello%20world".toUTF8)) /-- info: some " !" -/ #guard_msgs in -#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "%20%21".toUTF8)) +#eval IO.println (repr <| EncodedSegment.decode =<< (EncodedSegment.ofByteArray? "%20%21".toUTF8)) /-- info: none -/ #guard_msgs in -#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "%FF".toUTF8)) +#eval IO.println (repr <| EncodedSegment.decode =<< (EncodedSegment.ofByteArray? "%FF".toUTF8)) /-- info: some "\x00" -/ #guard_msgs in -#eval IO.println (repr <| EncodedString.decode =<< (EncodedString.ofByteArray? "%00".toUTF8)) +#eval IO.println (repr <| EncodedSegment.decode =<< (EncodedSegment.ofByteArray? "%00".toUTF8)) -- ============================================================================ -- Query String Encoding Tests @@ -166,25 +166,25 @@ info: some "\x00" info: some "hello+world" -/ #guard_msgs in -#eval IO.println (repr (EncodedQueryString.ofByteArray? "hello+world".toUTF8)) +#eval IO.println (repr (EncodedQueryString.ofByteArray? (r := isQueryChar) "hello+world".toUTF8)) /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedQueryString.ofByteArray? "%".toUTF8)) +#eval IO.println (repr (EncodedQueryString.ofByteArray? (r := isQueryChar) "%".toUTF8)) /-- info: some "hello world" -/ #guard_msgs in -#eval IO.println (repr <| EncodedQueryString.decode =<< (EncodedQueryString.ofByteArray? "hello+world".toUTF8)) +#eval IO.println (repr <| EncodedQueryString.decode (r := isQueryChar) =<< (EncodedQueryString.ofByteArray? (r := isQueryChar) "hello+world".toUTF8)) /-- info: some " " -/ #guard_msgs in -#eval IO.println (repr <| EncodedQueryString.decode =<< (EncodedQueryString.ofByteArray? "%20".toUTF8)) +#eval IO.println (repr <| EncodedQueryString.decode (r := isQueryChar) =<< (EncodedQueryString.ofByteArray? (r := isQueryChar) "%20".toUTF8)) -- ============================================================================ -- Request Target Parsing - Basic Tests @@ -547,7 +547,7 @@ info: /x/y info: http://example.com/#section1 -/ #guard_msgs in -#eval IO.println ((URI.parse! "http://example.com/").withFragment (some (toString <| URI.EncodedString.encode "section1"))) +#eval IO.println ((URI.parse! "http://example.com/").withFragment (some (toString (URI.EncodedString.encode "section1" : URI.EncodedFragment)))) /-- info: http://example.com/?key=value From 8c00ba48ae96c54155611ca16cad62f13388eaf0 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 10:12:22 -0300 Subject: [PATCH 12/30] fix: parser --- src/Std/Internal/Http/Data/URI/Parser.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 5849369d74bf..5f2590f1996c 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -272,7 +272,7 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do else acc.push val - let some segmentStr := URI.EncodedSegment.fromByteArray? segmentBytes + let some segmentStr := URI.EncodedSegment.ofByteArray? segmentBytes | fail "invalid percent encoding in path segment" segments := segments.push segmentStr From b042b8efbd410248a6e7205cd2c40224f802b701 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 10:13:00 -0300 Subject: [PATCH 13/30] fix: parser path --- src/Std/Internal/Http/Data/URI/Parser.lean | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 5f2590f1996c..67fb590dbcdb 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -263,16 +263,7 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do -- Parse segments while (← peek?).isSome do let segmentBytes ← parseSegment - - let segmentBytes := segmentBytes.toByteArray.foldl (init := ByteArray.empty) fun acc val => - if isSubDelims val ∨ val = ':'.toUInt8 ∨ val = '@'.toUInt8 then - acc.push '%'.toUInt8 - |>.push (hexDigit (val >>> 4)) - |>.push (hexDigit (val &&& 0xF)) - else - acc.push val - - let some segmentStr := URI.EncodedSegment.ofByteArray? segmentBytes + let some segmentStr := URI.EncodedSegment.ofByteArray? segmentBytes.toByteArray | fail "invalid percent encoding in path segment" segments := segments.push segmentStr From c4737fb66a372f1ff343fbb2d8f32ba2b2275f87 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 11:52:53 -0300 Subject: [PATCH 14/30] fix: parse ab-empty --- src/Std/Internal/Http/Data/URI/Basic.lean | 2 +- src/Std/Internal/Http/Data/URI/Parser.lean | 8 ++++---- tests/lean/run/async_http_uri.lean | 10 ++++++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 1f7528e87fb2..7ceb1b23996c 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -354,7 +354,7 @@ Returns `none` if the key is not found or if the value cannot be decoded as UTF- def get (query : Query) (key : String) : Option String := match query.find? key with | none => none - | some none => some "" -- Key exists but has Pno value + | some none => some "" -- Key exists but has no value | some (some encoded) => encoded.decode /-- diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 67fb590dbcdb..2a9e799e0f30 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -113,8 +113,10 @@ private def parsePctEncoded : Parser UInt8 := do -- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) private def parseScheme : Parser URI.Scheme := do - let schemeBytes ← takeWhileUpTo1 isSchemeChar 63 - return ⟨String.fromUTF8! schemeBytes.toByteArray |>.toLower, .isLowerCase_toLower⟩ + let first ← takeWhileUpTo1 isAlpha 1 + let rest ← takeWhileUpTo isSchemeChar 62 + let schemeBytes := first.toByteArray ++ rest.toByteArray + return ⟨String.fromUTF8! schemeBytes |>.toLower, .isLowerCase_toLower⟩ -- port = *DIGIT private def parsePortNumber : Parser UInt16 := do @@ -250,8 +252,6 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do if ← peekIs (· == '/'.toUInt8) then isAbsolute := true skip - if ← peekIs (· == '/'.toUInt8) then - fail "it's a scheme starter" else if forceAbsolute then if allowEmpty ∧ ((← isEof) ∨ ¬isSegmentOrSlash) then return { segments := segments, absolute := isAbsolute } diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index 7e115e03e73f..a301925fa907 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -190,6 +190,7 @@ info: some " " -- Request Target Parsing - Basic Tests -- ============================================================================ +#eval parseCheck "///path/with/encoded%20space" #eval parseCheck "/path/with/encoded%20space" #eval parseCheck "/path/with/encoded%20space/" #eval parseCheck "*" @@ -199,6 +200,7 @@ info: some " " #eval parseCheck "/files/../etc/passwd" #eval parseCheck "example.com:8080" #eval parseCheck "https://example.com:8080/ata" +#eval parseCheck "https://example.com:8080////./ata" #eval parseCheck "192.168.1.1:3000" #eval parseCheck "[::1]:8080" #eval parseCheck "http://example.com/path/to/resource?query=value" @@ -234,6 +236,14 @@ info: Std.Http.RequestTarget.originForm { segments := #["path", "with", "encoded let result ← runParser parseRequestTarget "/path/with/encoded%20space" IO.println (repr result) +/-- +info: Std.Http.RequestTarget.originForm { segments := #["", "", "path", "with", "encoded%20space"], absolute := true } none +-/ +#guard_msgs in +#eval show IO _ from do + let result ← runParser parseRequestTarget "///path/with/encoded%20space" + IO.println (repr result) + /-- info: Std.Http.RequestTarget.asteriskForm -/ From 1c564ed5f7ee6e8284f12ffe6e08d725e0645123 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 13 Feb 2026 11:56:44 -0300 Subject: [PATCH 15/30] fix: comment --- src/Std/Internal/Http/Data/URI/Basic.lean | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 7ceb1b23996c..391c1dfb4239 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -680,8 +680,7 @@ Reference: https://www.rfc-editor.org/rfc/rfc7230.html#section-5.3 -/ inductive RequestTarget where /-- - Origin-form request target (most common for HTTP requests). Consists of a path, optional query string, - and optional fragment. + Origin-form request target (most common for HTTP requests). Consists of a path and an optional query string. Example: `/path/to/resource?key=value#section` -/ | originForm (path : URI.Path) (query : Option URI.Query) From 10a66e9f9af513a7c428b4860198dfe7ad33dc3b Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Wed, 18 Feb 2026 10:35:04 -0300 Subject: [PATCH 16/30] fix: uri duplication checkers --- src/Std/Internal/Http/Data/URI/Basic.lean | 4 +- src/Std/Internal/Http/Data/URI/Encoding.lean | 89 +++----------------- tests/lean/run/async_http_uri.lean | 8 +- 3 files changed, 16 insertions(+), 85 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 391c1dfb4239..0c8aef5bc03e 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -27,7 +27,7 @@ namespace Std.Http set_option linter.all true -open Internal +open Internal Char namespace URI @@ -681,7 +681,7 @@ Reference: https://www.rfc-editor.org/rfc/rfc7230.html#section-5.3 inductive RequestTarget where /-- Origin-form request target (most common for HTTP requests). Consists of a path and an optional query string. - Example: `/path/to/resource?key=value#section` + Example: `/path/to/resource?key=value` -/ | originForm (path : URI.Path) (query : Option URI.Query) diff --git a/src/Std/Internal/Http/Data/URI/Encoding.lean b/src/Std/Internal/Http/Data/URI/Encoding.lean index bc909e2ef354..d1070e73d834 100644 --- a/src/Std/Internal/Http/Data/URI/Encoding.lean +++ b/src/Std/Internal/Http/Data/URI/Encoding.lean @@ -13,10 +13,12 @@ import Init.Data.UInt.Lemmas import Init.Data.UInt.Bitwise import Init.Data.Array.Lemmas public import Init.Data.String +public import Std.Internal.Http.Internal.Char public section namespace Std.Http.URI +open Internal Char set_option linter.all true @@ -28,77 +30,6 @@ character validation, encoding/decoding functions, and types that maintain encod Lean's dependent type system. -/ -/-- -Checks if a byte represents an ASCII character (value < 128). --/ -def isAscii (c : UInt8) : Bool := - c < 128 - -/-- -Checks if a byte is a hexadecimal digit (0-9, a-f, or A-F). Note: This accepts both lowercase and -uppercase hex digits. --/ -def isHexDigit (c : UInt8) : Bool := - (c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) || - (c ≥ 'a'.toUInt8 && c ≤ 'f'.toUInt8) || - (c ≥ 'A'.toUInt8 && c ≤ 'F'.toUInt8) - -/-- -Checks if a byte is an alphanumeric digit (0-9, a-z, or A-Z). --/ -def isAlphaNum (c : UInt8) : Bool := - (c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) || - (c ≥ 'a'.toUInt8 && c ≤ 'z'.toUInt8) || - (c ≥ 'A'.toUInt8 && c ≤ 'Z'.toUInt8) - -/-- -Checks if a byte is an unreserved character according to RFC 3986. Unreserved characters are: -alphanumeric, hyphen, period, underscore, and tilde. --/ -def isUnreserved (c : UInt8) : Bool := - isAlphaNum c || - (c = '-'.toUInt8 || c = '.'.toUInt8 || c = '_'.toUInt8 || c = '~'.toUInt8) - -/-- -Checks if a byte is a sub-delimiter character according to RFC 3986. -Sub-delimiters are: `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`. --/ -def isSubDelims (c : UInt8) : Bool := - c = '!'.toUInt8 || c = '$'.toUInt8 || c = '&'.toUInt8 || c = '\''.toUInt8 || - c = '('.toUInt8 || c = ')'.toUInt8 || c = '*'.toUInt8 || c = '+'.toUInt8 || - c = ','.toUInt8 || c = ';'.toUInt8 || c = '='.toUInt8 - -/-- -Checks if a byte is a valid path character (`pchar`) according to RFC 3986. -`pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` - -Note: The percent-encoding (`pct-encoded`) is handled separately by `isEncodedChar`, -so this predicate only covers the non-percent characters. --/ -def isPChar (c : UInt8) : Bool := - isUnreserved c || isSubDelims c || c = ':'.toUInt8 || c = '@'.toUInt8 - -/-- -Checks if a byte is a valid character in a URI query component according to RFC 3986. -`query = *( pchar / "/" / "?" )` --/ -def isQueryChar (c : UInt8) : Bool := - isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8 - -/-- -Checks if a byte is a valid character in a URI fragment component according to RFC 3986. -`fragment = *( pchar / "/" / "?" )` --/ -def isFragmentChar (c : UInt8) : Bool := - isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8 - -/-- -Checks if a byte is a valid character in a URI userinfo component according to RFC 3986. -`userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` --/ -def isUserInfoChar (c : UInt8) : Bool := - isUnreserved c || isSubDelims c || c = ':'.toUInt8 - /-- Checks if a byte is a valid character in a percent-encoded URI component. Valid characters are unreserved characters or the percent sign (for escape sequences). @@ -490,7 +421,7 @@ private def push (s : EncodedQueryString r) (c : UInt8) (h : isEncodedQueryChar Attempts to create an `EncodedQueryString` from a `ByteArray`. Returns `some` if the byte array contains only valid encoded query characters and all percent signs are followed by exactly two hex digits, `none` otherwise. -/ -def ofByteArray? (ba : ByteArray) : Option (EncodedQueryString r) := +def ofByteArray? (ba : ByteArray) (r : UInt8 → Bool := isQueryChar) : Option (EncodedQueryString r) := if h : isAllowedEncodedQueryChars r ba then if isValidPercentEncoding ba then some ⟨ba, h⟩ else none else none @@ -498,8 +429,8 @@ def ofByteArray? (ba : ByteArray) : Option (EncodedQueryString r) := /-- Creates an `EncodedQueryString` from a `ByteArray`, panicking if the byte array is invalid. -/ -def ofByteArray! (ba : ByteArray) : EncodedQueryString r := - match ofByteArray? ba with +def ofByteArray! (ba : ByteArray) (r : UInt8 → Bool := isQueryChar) : EncodedQueryString r := + match ofByteArray? ba r with | some es => es | none => panic! "invalid encoded query string" @@ -507,14 +438,14 @@ def ofByteArray! (ba : ByteArray) : EncodedQueryString r := Creates an `EncodedQueryString` from a `String` by checking if it's already a valid percent-encoded string. Returns `some` if valid, `none` otherwise. -/ -def ofString? (s : String) : Option (EncodedQueryString r) := - ofByteArray? s.toUTF8 +def ofString? (s : String) (r : UInt8 → Bool := isQueryChar) : Option (EncodedQueryString r) := + ofByteArray? s.toUTF8 r /-- Creates an `EncodedQueryString` from a `String`, panicking if the string is not a valid percent-encoded string. -/ -def ofString! (s : String) : EncodedQueryString r := - ofByteArray! s.toUTF8 +def ofString! (s : String) (r : UInt8 → Bool := isQueryChar) : EncodedQueryString r := + ofByteArray! s.toUTF8 r /-- Creates an `EncodedQueryString` from a `ByteArray` with compile-time proofs. @@ -545,7 +476,7 @@ private def byteToHex (b : UInt8) (s : EncodedQueryString r) : EncodedQueryStrin Encodes a raw string into an `EncodedQueryString` with automatic proof construction. Unreserved characters are kept as-is, spaces are encoded as '+', and all other characters are percent-encoded. -/ -def encode {r} (s : String) : EncodedQueryString r := +def encode (s : String) (r : UInt8 → Bool := isQueryChar) : EncodedQueryString r := s.toUTF8.foldl (init := EncodedQueryString.empty) fun acc c => if h : isAscii c ∧ r c then acc.push c (by simp [isEncodedQueryChar, isEncodedChar]; exact Or.inl (And.intro h.left (Or.inl h.right))) diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index a301925fa907..00f48a750f84 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -166,25 +166,25 @@ info: some "\x00" info: some "hello+world" -/ #guard_msgs in -#eval IO.println (repr (EncodedQueryString.ofByteArray? (r := isQueryChar) "hello+world".toUTF8)) +#eval IO.println (repr (EncodedQueryString.ofByteArray? "hello+world".toUTF8)) /-- info: none -/ #guard_msgs in -#eval IO.println (repr (EncodedQueryString.ofByteArray? (r := isQueryChar) "%".toUTF8)) +#eval IO.println (repr (EncodedQueryString.ofByteArray? "%".toUTF8)) /-- info: some "hello world" -/ #guard_msgs in -#eval IO.println (repr <| EncodedQueryString.decode (r := isQueryChar) =<< (EncodedQueryString.ofByteArray? (r := isQueryChar) "hello+world".toUTF8)) +#eval IO.println (repr <| EncodedQueryString.decode =<< (EncodedQueryString.ofByteArray? "hello+world".toUTF8)) /-- info: some " " -/ #guard_msgs in -#eval IO.println (repr <| EncodedQueryString.decode (r := isQueryChar) =<< (EncodedQueryString.ofByteArray? (r := isQueryChar) "%20".toUTF8)) +#eval IO.println (repr <| EncodedQueryString.decode =<< (EncodedQueryString.ofByteArray? "%20".toUTF8)) -- ============================================================================ -- Request Target Parsing - Basic Tests From 37c7b1e22cb83fc63e3c7d939930bf7264bd3853 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Wed, 18 Feb 2026 11:03:35 -0300 Subject: [PATCH 17/30] feat: reuse char --- src/Std/Internal/Http/Data/URI/Parser.lean | 104 +++++++-------------- 1 file changed, 32 insertions(+), 72 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 2a9e799e0f30..b5049bb89a28 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -23,69 +23,12 @@ It handles parsing of schemes, authorities, paths, queries, and fragments. namespace Std.Http.URI.Parser +open Internal Char + set_option linter.all true open Std Internal Parsec ByteArray -@[inline] -private def isDigit (c : UInt8) : Bool := - c >= '0'.toUInt8 ∧ c <= '9'.toUInt8 - -@[inline] -private def isHexDigit (c : UInt8) : Bool := - isDigit c ∨ (c >= 'A'.toUInt8 ∧ c <= 'F'.toUInt8) ∨ (c >= 'a'.toUInt8 ∧ c <= 'f'.toUInt8) - -@[inline] -private def isAlpha (c : UInt8) : Bool := - (c >= 'A'.toUInt8 ∧ c <= 'Z'.toUInt8) ∨ (c >= 'a'.toUInt8 ∧ c <= 'z'.toUInt8) - -@[inline] -private def isAlphaNum (c : UInt8) : Bool := - isAlpha c ∨ isDigit c - -@[inline] -private def isUnreserved (c : UInt8) : Bool := - isAlphaNum c ∨ c = '-'.toUInt8 ∨ c = '.'.toUInt8 ∨ c = '_'.toUInt8 ∨ c = '~'.toUInt8 - -@[inline] -private def isSubDelims (c : UInt8) : Bool := - c = '!'.toUInt8 ∨ c = '$'.toUInt8 ∨ c = '&'.toUInt8 ∨ c = '\''.toUInt8 ∨ - c = '('.toUInt8 ∨ c = ')'.toUInt8 ∨ c = '*'.toUInt8 ∨ c = '+'.toUInt8 ∨ - c = ','.toUInt8 ∨ c = ';'.toUInt8 ∨ c = '='.toUInt8 - -@[inline] -private def isGenDelims (c : UInt8) : Bool := - c = ':'.toUInt8 ∨ c = '/'.toUInt8 ∨ c = '?'.toUInt8 ∨ c = '#'.toUInt8 ∨ - c = '['.toUInt8 ∨ c = ']'.toUInt8 ∨ c = '@'.toUInt8 - -@[inline] -private def isReserved (c : UInt8) : Bool := - isGenDelims c ∨ isSubDelims c - -@[inline] -private def isPChar (c : UInt8) : Bool := - isUnreserved c ∨ isSubDelims c ∨ c = ':'.toUInt8 ∨ c = '@'.toUInt8 ∨ c = '%'.toUInt8 - -@[inline] -private def isRegNameChar (c : UInt8) : Bool := - isUnreserved c ∨ isSubDelims c ∨ c = '%'.toUInt8 - -@[inline] -private def isSchemeChar (c : UInt8) : Bool := - isAlphaNum c ∨ c = '+'.toUInt8 ∨ c = '-'.toUInt8 ∨ c = '.'.toUInt8 - -@[inline] -private def isQueryChar (c : UInt8) : Bool := - isPChar c ∨ c = '/'.toUInt8 ∨ c = '?'.toUInt8 - -@[inline] -private def isFragmentChar (c : UInt8) : Bool := - isPChar c ∨ c = '/'.toUInt8 ∨ c = '?'.toUInt8 - -@[inline] -private def isUserInfoChar (c : UInt8) : Bool := - isUnreserved c ∨ isSubDelims c ∨ c = '%'.toUInt8 ∨ c = ':'.toUInt8 - @[inline] private def tryOpt (p : Parser α) : Parser (Option α) := optional (attempt p) @@ -114,7 +57,11 @@ private def parsePctEncoded : Parser UInt8 := do -- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) private def parseScheme : Parser URI.Scheme := do let first ← takeWhileUpTo1 isAlpha 1 - let rest ← takeWhileUpTo isSchemeChar 62 + let rest ← takeWhileUpTo + (fun c => + isAlphaNum c ∨ + c = '+'.toUInt8 ∨ c = '-'.toUInt8 ∨ c = '.'.toUInt8) + 62 let schemeBytes := first.toByteArray ++ rest.toByteArray return ⟨String.fromUTF8! schemeBytes |>.toLower, .isLowerCase_toLower⟩ @@ -134,7 +81,11 @@ private def parsePortNumber : Parser UInt16 := do -- userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) private def parseUserInfo : Parser URI.UserInfo := do - let userBytesName ← takeWhileUpTo (fun x => x ≠ ':'.toUInt8 ∧ isUserInfoChar x) 1024 + let userBytesName ← takeWhileUpTo + (fun x => + x ≠ ':'.toUInt8 ∧ + (isUserInfoChar x ∨ x = '%'.toUInt8)) + 1024 let some userName := URI.EncodedUserInfo.ofByteArray? userBytesName.toByteArray | fail "invalid percent encoding in user info" @@ -142,7 +93,9 @@ private def parseUserInfo : Parser URI.UserInfo := do let userPass ← if ← peekIs (· == ':'.toUInt8) then skip - let userBytesPass ← takeWhileUpTo isUserInfoChar 1024 + let userBytesPass ← takeWhileUpTo + (fun x => isUserInfoChar x ∨ x = '%'.toUInt8) + 1024 let some userStrPass := URI.EncodedUserInfo.ofByteArray? userBytesPass.toByteArray >>= URI.EncodedUserInfo.decode | fail "invalid percent encoding in user info" @@ -160,7 +113,9 @@ private def parseUserInfo : Parser URI.UserInfo := do private def parseIPv6 : Parser Net.IPv6Addr := do skipByte '['.toUInt8 - let result ← takeWhileUpTo1 (fun x => x = ':'.toUInt8 ∨ isHexDigit x) 256 + let result ← takeWhileUpTo1 + (fun x => x = ':'.toUInt8 ∨ isHexDigit x) + 256 skipByte ']'.toUInt8 @@ -172,7 +127,9 @@ private def parseIPv6 : Parser Net.IPv6Addr := do -- IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet private def parseIPv4 : Parser Net.IPv4Addr := do - let result ← takeWhileUpTo1 (fun x => x = '.'.toUInt8 ∨ isDigit x) 256 + let result ← takeWhileUpTo1 + (fun x => x = '.'.toUInt8 ∨ isDigit x) + 256 let ipv4Str := String.fromUTF8! result.toByteArray let some ipv4Str := Std.Net.IPv4Addr.ofString ipv4Str @@ -187,14 +144,14 @@ private def parseHost : Parser URI.Host := do if (← peekWhen? (· == '['.toUInt8)).isSome then return .ipv6 (← parseIPv6) else - if (← peekWhen? isDigit).isSome then + if (← peekWhen? Internal.Char.isDigit).isSome then if let some ipv4 ← tryOpt parseIPv4 then return .ipv4 ipv4 -- It needs to be a legal DNS label, so it differs from reg-name. - let isHostName x := isAlphaNum x ∨ x = '-'.toUInt8 ∨ x = '.'.toUInt8 - - let some str := String.fromUTF8? (← takeWhileUpTo1 isHostName 1024).toByteArray + let some str := String.fromUTF8? (← takeWhileUpTo1 + (fun x => isAlphaNum x ∨ x = '-'.toUInt8 ∨ x = '.'.toUInt8) + 1024).toByteArray | fail s!"invalid host" let lower := str.toLower @@ -220,7 +177,7 @@ private def parseAuthority : Parser URI.Authority := do -- segment = *pchar private def parseSegment : Parser ByteSlice := do - takeWhileUpTo isPChar 256 + takeWhileUpTo (fun c => isPChar c ∨ c = '%'.toUInt8) 256 /- path = path-abempty ; begins with "/" or is empty @@ -243,7 +200,8 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do let mut isAbsolute := false let mut segments : Array _ := #[] - let isSegmentOrSlash ← peekIs (fun c => isPChar c ∨ c = '/'.toUInt8) + let isSegmentOrSlash ← + peekIs (fun c => isPChar c ∨ c = '%'.toUInt8 ∨ c = '/'.toUInt8) if ¬allowEmpty ∧ ((← isEof) ∨ ¬isSegmentOrSlash) then fail "need a path" @@ -280,7 +238,8 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do -- query = *( pchar / "/" / "?" ) private def parseQuery : Parser URI.Query := do - let queryBytes ← takeWhileUpTo isQueryChar 1024 + let queryBytes ← + takeWhileUpTo (fun c => isQueryChar c ∨ c = '%'.toUInt8) 1024 let some queryStr := String.fromUTF8? queryBytes.toByteArray | fail "invalid query string" @@ -303,7 +262,8 @@ private def parseQuery : Parser URI.Query := do -- fragment = *( pchar / "/" / "?" ) private def parseFragment : Parser URI.EncodedFragment := do - let fragmentBytes ← takeWhileUpTo isFragmentChar 1024 + let fragmentBytes ← + takeWhileUpTo (fun c => isFragmentChar c ∨ c = '%'.toUInt8) 1024 let some fragmentStr := URI.EncodedFragment.ofByteArray? fragmentBytes.toByteArray | fail "invalid percent encoding in fragment" From 319214cfb34ace0eaa3d2ed6dc48f043e5d28f2b Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Thu, 19 Feb 2026 10:51:32 -0300 Subject: [PATCH 18/30] feat: config URI --- src/Std/Internal/Http/Data/URI/Basic.lean | 2 +- src/Std/Internal/Http/Data/URI/Config.lean | 80 ++++++++++++++++ src/Std/Internal/Http/Data/URI/Encoding.lean | 2 +- src/Std/Internal/Http/Data/URI/Parser.lean | 98 ++++++++++++-------- src/Std/Internal/Http/Internal/Char.lean | 4 + 5 files changed, 144 insertions(+), 42 deletions(-) create mode 100644 src/Std/Internal/Http/Data/URI/Config.lean diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 0c8aef5bc03e..1da8a425caea 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -8,8 +8,8 @@ module prelude import Init.Data.ToString public import Std.Net -public import Std.Internal.Http.Data.URI.Encoding public import Std.Internal.Http.Internal +public import Std.Internal.Http.Data.URI.Encoding public section diff --git a/src/Std/Internal/Http/Data/URI/Config.lean b/src/Std/Internal/Http/Data/URI/Config.lean new file mode 100644 index 000000000000..ef8eff811258 --- /dev/null +++ b/src/Std/Internal/Http/Data/URI/Config.lean @@ -0,0 +1,80 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Sofia Rodrigues +-/ +module + +prelude +public import Init.Data + +public section + +/-! +# URI Parser Configuration + +This module defines `URI.Config`, which controls per-component size limits used during URI +parsing. All limits default to the values previously hardcoded in the parser, except +`maxQueryLength`, which is raised from 1024 to 8192 to match `H1.Config.maxUriLength` and +accommodate real-world query strings. +-/ + +namespace Std.Http.URI + +set_option linter.all true + +/-- +Per-component size limits for the URI parser. +-/ +structure Config where + /-- + Maximum length of the URI scheme component (e.g. `https`), in bytes. + The scheme grammar requires at least one leading ALPHA; the remaining budget is + `max(0, maxSchemeLength - 1)` additional characters. + -/ + maxSchemeLength : Nat := 13 + + /-- + Maximum length of the host `reg-name` component, in bytes. + -/ + maxHostLength : Nat := 253 + + /-- + Maximum length of the userinfo component (username and password each), in bytes. + -/ + maxUserInfoLength : Nat := 1024 + + /-- + Maximum length of a single path segment, in bytes. + -/ + maxSegmentLength : Nat := 256 + + /-- + Maximum length of the query string, in bytes. + Raised from the previously hardcoded 1024 to 8192 to match `H1.Config.maxUriLength` + and allow real-world query strings. + -/ + maxQueryLength : Nat := 8192 + + /-- + Maximum length of the fragment component, in bytes. + -/ + maxFragmentLength : Nat := 1024 + + /-- + Maximum number of path segments. + Prevents excessive segment counts that could arise from paths like `/a/b/c/…` repeated many times. + -/ + maxPathSegments : Nat := 128 + + /-- + Maximum total byte length of the path (all segments combined, including separating slashes). + -/ + maxTotalPathLength : Nat := 8192 + + /-- + Maximum number of query parameters (key-value pairs separated by `&`). + -/ + maxQueryParams : Nat := 100 + +end Std.Http.URI diff --git a/src/Std/Internal/Http/Data/URI/Encoding.lean b/src/Std/Internal/Http/Data/URI/Encoding.lean index d1070e73d834..773ebdb6824b 100644 --- a/src/Std/Internal/Http/Data/URI/Encoding.lean +++ b/src/Std/Internal/Http/Data/URI/Encoding.lean @@ -35,7 +35,7 @@ Checks if a byte is a valid character in a percent-encoded URI component. Valid unreserved characters or the percent sign (for escape sequences). -/ def isEncodedChar (rule : UInt8 → Bool) (c : UInt8) : Bool := - isAscii c ∧ (rule c ∨ isHexDigit c ∨ c = '%'.toUInt8) + isAscii c ∧ (rule c ∨ isHexDigit c ∨ c = '%'.toUInt8) /-- Checks if a byte is valid in a percent-encoded query string component. Extends `isEncodedChar` to also diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index b5049bb89a28..2307b820797a 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -11,6 +11,7 @@ public import Init.Data.String public import Std.Internal.Parsec public import Std.Internal.Parsec.ByteArray public import Std.Internal.Http.Data.URI.Basic +public import Std.Internal.Http.Data.URI.Config public section @@ -55,13 +56,13 @@ private def parsePctEncoded : Parser UInt8 := do return (hi <<< 4) ||| lo -- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) -private def parseScheme : Parser URI.Scheme := do +private def parseScheme (config : URI.Config) : Parser URI.Scheme := do let first ← takeWhileUpTo1 isAlpha 1 let rest ← takeWhileUpTo (fun c => isAlphaNum c ∨ c = '+'.toUInt8 ∨ c = '-'.toUInt8 ∨ c = '.'.toUInt8) - 62 + (config.maxSchemeLength - 1) let schemeBytes := first.toByteArray ++ rest.toByteArray return ⟨String.fromUTF8! schemeBytes |>.toLower, .isLowerCase_toLower⟩ @@ -72,7 +73,7 @@ private def parsePortNumber : Parser UInt16 := do let portStr := String.fromUTF8! portBytes.toByteArray let some portNum := String.toNat? portStr - | fail s!"invalid port number:{portStr}" + | fail s!"invalid port number: {portStr}" if portNum > 65535 then fail s!"port number too large: {portNum}" @@ -80,12 +81,12 @@ private def parsePortNumber : Parser UInt16 := do return portNum.toUInt16 -- userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) -private def parseUserInfo : Parser URI.UserInfo := do +private def parseUserInfo (config : URI.Config) : Parser URI.UserInfo := do let userBytesName ← takeWhileUpTo (fun x => x ≠ ':'.toUInt8 ∧ (isUserInfoChar x ∨ x = '%'.toUInt8)) - 1024 + config.maxUserInfoLength let some userName := URI.EncodedUserInfo.ofByteArray? userBytesName.toByteArray | fail "invalid percent encoding in user info" @@ -95,7 +96,7 @@ private def parseUserInfo : Parser URI.UserInfo := do let userBytesPass ← takeWhileUpTo (fun x => isUserInfoChar x ∨ x = '%'.toUInt8) - 1024 + config.maxUserInfoLength let some userStrPass := URI.EncodedUserInfo.ofByteArray? userBytesPass.toByteArray >>= URI.EncodedUserInfo.decode | fail "invalid percent encoding in user info" @@ -140,7 +141,7 @@ private def parseIPv4 : Parser Net.IPv4Addr := do -- host = IP-literal / IPv4address / reg-name -- Note: RFC 1123 allows domain labels to start with digits, so we must try IPv4 -- first and fall back to reg-name parsing if it fails. -private def parseHost : Parser URI.Host := do +private def parseHost (config : URI.Config) : Parser URI.Host := do if (← peekWhen? (· == '['.toUInt8)).isSome then return .ipv6 (← parseIPv6) else @@ -151,7 +152,7 @@ private def parseHost : Parser URI.Host := do -- It needs to be a legal DNS label, so it differs from reg-name. let some str := String.fromUTF8? (← takeWhileUpTo1 (fun x => isAlphaNum x ∨ x = '-'.toUInt8 ∨ x = '.'.toUInt8) - 1024).toByteArray + config.maxHostLength).toByteArray | fail s!"invalid host" let lower := str.toLower @@ -161,13 +162,13 @@ private def parseHost : Parser URI.Host := do fail s!"invalid domain name: {str}" -- authority = [ userinfo "@" ] host [ ":" port ] -private def parseAuthority : Parser URI.Authority := do +private def parseAuthority (config : URI.Config) : Parser URI.Authority := do let userinfo ← tryOpt do - let ui ← parseUserInfo + let ui ← parseUserInfo config skipByte '@'.toUInt8 return ui - let host ← parseHost + let host ← parseHost config let port ← optional do skipByte ':'.toUInt8 @@ -176,8 +177,8 @@ private def parseAuthority : Parser URI.Authority := do return { userInfo := userinfo, host := host, port := port } -- segment = *pchar -private def parseSegment : Parser ByteSlice := do - takeWhileUpTo (fun c => isPChar c ∨ c = '%'.toUInt8) 256 +private def parseSegment (config : URI.Config) : Parser ByteSlice := do + takeWhileUpTo (fun c => isPChar c ∨ c = '%'.toUInt8) config.maxSegmentLength /- path = path-abempty ; begins with "/" or is empty @@ -194,11 +195,12 @@ path-empty = 0 -/ /-- -Parses an URI with combined parsing and validation. +Parses a URI path with combined parsing and validation. -/ -def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do +def parsePath (config : URI.Config) (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do let mut isAbsolute := false let mut segments : Array _ := #[] + let mut totalLength := 0 let isSegmentOrSlash ← peekIs (fun c => isPChar c ∨ c = '%'.toUInt8 ∨ c = '/'.toUInt8) @@ -209,6 +211,7 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do -- Check if path is absolute if ← peekIs (· == '/'.toUInt8) then isAbsolute := true + totalLength := totalLength + 1 skip else if forceAbsolute then if allowEmpty ∧ ((← isEof) ∨ ¬isSegmentOrSlash) then @@ -220,13 +223,23 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do -- Parse segments while (← peek?).isSome do - let segmentBytes ← parseSegment + if segments.size >= config.maxPathSegments then + fail s!"too many path segments (limit: {config.maxPathSegments})" + + let segmentBytes ← parseSegment config let some segmentStr := URI.EncodedSegment.ofByteArray? segmentBytes.toByteArray | fail "invalid percent encoding in path segment" + totalLength := totalLength + segmentBytes.size + if totalLength > config.maxTotalPathLength then + fail s!"path too long (limit: {config.maxTotalPathLength} bytes)" + segments := segments.push segmentStr if (← peek?).any (· == '/'.toUInt8) then + totalLength := totalLength + 1 + if totalLength > config.maxTotalPathLength then + fail s!"path too long (limit: {config.maxTotalPathLength} bytes)" skip -- If path ends with '/', add empty segment if (← peek?).isNone then @@ -237,14 +250,19 @@ def parsePath (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do return { segments := segments, absolute := isAbsolute } -- query = *( pchar / "/" / "?" ) -private def parseQuery : Parser URI.Query := do +private def parseQuery (config : URI.Config) : Parser URI.Query := do let queryBytes ← - takeWhileUpTo (fun c => isQueryChar c ∨ c = '%'.toUInt8) 1024 + takeWhileUpTo (fun c => isQueryChar c ∨ c = '%'.toUInt8) config.maxQueryLength let some queryStr := String.fromUTF8? queryBytes.toByteArray | fail "invalid query string" - let pairs : Option URI.Query := queryStr.splitOn "&" |>.foldlM (init := URI.Query.empty) fun acc pair => do + let rawPairs := queryStr.splitOn "&" + + if rawPairs.length > config.maxQueryParams then + fail s!"too many query parameters (limit: {config.maxQueryParams})" + + let pairs : Option URI.Query := rawPairs.foldlM (init := URI.Query.empty) fun acc pair => do match pair.splitOn "=" with | [key] => let key ← URI.EncodedQueryParam.fromString? key @@ -261,24 +279,24 @@ private def parseQuery : Parser URI.Query := do fail "invalid query string" -- fragment = *( pchar / "/" / "?" ) -private def parseFragment : Parser URI.EncodedFragment := do +private def parseFragment (config : URI.Config) : Parser URI.EncodedFragment := do let fragmentBytes ← - takeWhileUpTo (fun c => isFragmentChar c ∨ c = '%'.toUInt8) 1024 + takeWhileUpTo (fun c => isFragmentChar c ∨ c = '%'.toUInt8) config.maxFragmentLength let some fragmentStr := URI.EncodedFragment.ofByteArray? fragmentBytes.toByteArray | fail "invalid percent encoding in fragment" return fragmentStr -private def parseHierPart : Parser (Option URI.Authority × URI.Path) := do +private def parseHierPart (config : URI.Config) : Parser (Option URI.Authority × URI.Path) := do -- Check for "//" authority path-abempty if (← tryOpt (skipString "//")).isSome then - let authority ← parseAuthority - let path ← parsePath true true -- path-abempty (must start with "/" or be empty) + let authority ← parseAuthority config + let path ← parsePath config true true -- path-abempty (must start with "/" or be empty) return (some authority, path) else -- path-absolute / path-rootless / path-empty - let path ← parsePath false true + let path ← parsePath config false true return (none, path) /-- @@ -287,17 +305,17 @@ Parses a URI (Uniform Resource Identifier). URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty -/ -public def parseURI : Parser URI := do - let scheme ← parseScheme +public def parseURI (config : URI.Config := {}) : Parser URI := do + let scheme ← parseScheme config skipByte ':'.toUInt8 - let (authority, path) ← parseHierPart + let (authority, path) ← parseHierPart config - let query ← optional (skipByteChar '?' *> parseQuery) + let query ← optional (skipByteChar '?' *> parseQuery config) let query := query.getD .empty let fragment ← optional do - let some result := (← (skipByteChar '#' *> parseFragment)) |>.decode + let some result := (← (skipByteChar '#' *> parseFragment config)) |>.decode | fail "invalid fragment parse encoding" return result @@ -306,7 +324,7 @@ public def parseURI : Parser URI := do /-- Parses a request target with combined parsing and validation. -/ -public def parseRequestTarget : Parser RequestTarget := +public def parseRequestTarget (config : URI.Config := {}) : Parser RequestTarget := asterisk <|> origin <|> authority <|> absolute where -- The asterisk form @@ -318,8 +336,8 @@ where -- absolute-path = 1*( "/" segment ) origin : Parser RequestTarget := attempt do if ← peekIs (· == '/'.toUInt8) then - let path ← parsePath true true - let query ← optional (skipByte '?'.toUInt8 *> parseQuery) + let path ← parsePath config true true + let query ← optional (skipByte '?'.toUInt8 *> parseQuery config) return .originForm path query else @@ -327,17 +345,17 @@ where -- absolute-URI = scheme ":" hier-part [ "?" query ] absolute : Parser RequestTarget := attempt do - let scheme ← parseScheme + let scheme ← parseScheme config skipByte ':'.toUInt8 - let (authority, path) ← parseHierPart - let query ← optional (skipByteChar '?' *> parseQuery) + let (auth, path) ← parseHierPart config + let query ← optional (skipByteChar '?' *> parseQuery config) let query := query.getD URI.Query.empty - return .absoluteForm { path, scheme, authority, query, fragment := none } (by simp) + return .absoluteForm { path, scheme, authority := auth, query, fragment := none } (by simp) -- authority-form = host ":" port authority : Parser RequestTarget := attempt do - let host ← parseHost + let host ← parseHost config skipByteChar ':' let port ← parsePortNumber return .authorityForm { host, port := some port } @@ -345,8 +363,8 @@ where /-- Parses an HTTP `Host` header value. -/ -public def parseHostHeader : Parser (URI.Host × Option UInt16) := do - let host ← parseHost +public def parseHostHeader (config : URI.Config := {}) : Parser (URI.Host × Option UInt16) := do + let host ← parseHost config let port ← optional do skipByte ':'.toUInt8 diff --git a/src/Std/Internal/Http/Internal/Char.lean b/src/Std/Internal/Http/Internal/Char.lean index 2b2fed63a137..76180579c2d6 100644 --- a/src/Std/Internal/Http/Internal/Char.lean +++ b/src/Std/Internal/Http/Internal/Char.lean @@ -25,6 +25,10 @@ set_option linter.all true /-- Checks if a character is a valid HTTP token character per RFC 9110 §5.6.2. Token characters include alphanumerics and the following: `` !#$%&'*+-.^_`|~ `` + +The bitmask `0x57ffffffc7fffffe03ff6cfa00000000` encodes exactly the 128-bit set of +allowed ASCII token characters: bit `i` is set iff ASCII code point `i` is a token +character. `Nat.testBit` then performs an O(1) membership test. -/ @[expose] def isTokenCharacter (c : Char) : Bool := From 462e3d02ddd2fcd53aa4c4a52e704dd8e027aefb Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 20 Feb 2026 12:01:21 -0300 Subject: [PATCH 19/30] fix: import coe --- src/Std/Internal/Http/Data/URI/Config.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Std/Internal/Http/Data/URI/Config.lean b/src/Std/Internal/Http/Data/URI/Config.lean index ef8eff811258..19cc51805482 100644 --- a/src/Std/Internal/Http/Data/URI/Config.lean +++ b/src/Std/Internal/Http/Data/URI/Config.lean @@ -6,7 +6,7 @@ Authors: Sofia Rodrigues module prelude -public import Init.Data +public import Init.Data.Nat public section From e0efb8aec9bc41c280a75e76097c8f8c8fef0ed9 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 20 Feb 2026 16:59:24 -0300 Subject: [PATCH 20/30] fix: scheme parser --- src/Std/Internal/Http/Data/URI/Basic.lean | 66 +++++++++++++++++----- src/Std/Internal/Http/Data/URI/Parser.lean | 9 ++- tests/lean/run/async_http_uri.lean | 6 +- 3 files changed, 63 insertions(+), 18 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 1da8a425caea..df036c2b559a 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -31,13 +31,21 @@ open Internal Char namespace URI + +/-- +Checks if a character is valid after the first character of a URI scheme. +Valid characters are ASCII alphanumeric, `+`, `-`, and `.`. +-/ +def isValidSchemeChar (c : Char) : Bool := + c.isAlphanum || c == '+' || c == '-' || c == '.' + /-- URI scheme identifier (e.g., "http", "https", "ftp"). -/ -abbrev Scheme := { s : String // IsLowerCase s } +abbrev Scheme := { s : String // IsLowerCase s ∧ s.toList.all isValidSchemeChar ∧ ¬s.isEmpty } instance : Inhabited Scheme where - default := ⟨"", .isLowerCase_empty⟩ + default := ⟨"a", ⟨by decide, by decide, by decide⟩⟩ /-- User information component containing the username and optional password. Both fields store decoded @@ -73,7 +81,7 @@ A domain name represented as a validated, lowercase-normalized string. Only ASCII alphanumeric characters, hyphens, and dots are allowed. Internationalized domain names must be converted to punycode before use. -/ -abbrev DomainName := { s : String // IsLowerCase s ∧ IsValidDomainName s } +abbrev DomainName := { s : String // IsLowerCase s ∧ IsValidDomainName s ∧ ¬s.isEmpty } /-- Host component of a URI, supporting domain names and IP addresses. @@ -494,10 +502,22 @@ Creates an empty URI builder. def empty : Builder := {} /-- -Sets the URI scheme (e.g., "http", "https"). +Attempts to set the URI scheme (e.g., "http", "https"). +Returns `none` if the scheme is not a valid RFC 3986 scheme. +A scheme must start with an ASCII letter followed by alphanumeric, `+`, `-`, or `.` characters. +-/ +def setScheme? (b : Builder) (scheme : String) : Option Builder := + if IsLowerCase scheme ∧ scheme.toList.all isValidSchemeChar ∧ scheme.isEmpty then some { b with scheme := some scheme.toLower } + else none + +/-- +Sets the URI scheme (e.g., "http", "https"). Panics if the scheme is invalid. +Use `setScheme?` if you need a safe option-returning version. -/ -def setScheme (b : Builder) (scheme : String) : Builder := - { b with scheme := some scheme } +def setScheme! (b : Builder) (scheme : String) : Builder := + match b.setScheme? scheme with + | some b => b + | none => panic! s!"invalid URI scheme: {scheme.quote}" /-- Sets the user information with username and optional password. @@ -518,8 +538,10 @@ Internationalized domain names must be converted to punycode before use. -/ def setHost? (b : Builder) (name : String) : Option Builder := let lower := name.toLower - if h : IsValidDomainName lower then - some { b with host := some (Host.name ⟨lower, IsLowerCase.isLowerCase_toLower, h⟩) } + if h₁: lower.isEmpty then + none + else if h : IsValidDomainName lower then + some { b with host := some (Host.name ⟨lower, IsLowerCase.isLowerCase_toLower, h, h₁⟩) } else none @@ -595,7 +617,12 @@ Builds a complete URI from the builder state, encoding all components. Defaults none is specified. -/ def build (b : Builder) : URI := - let scheme := b.scheme.getD "https" + let schemeStr := (b.scheme.getD "https").toLower + let scheme : Scheme := + if h : IsLowerCase schemeStr ∧ schemeStr.toList.all isValidSchemeChar ∧ ¬schemeStr.isEmpty then + ⟨schemeStr, h.1, h.2.1, h.2.2⟩ + else + default let authority := if b.host.isSome then @@ -618,7 +645,7 @@ def build (b : Builder) : URI := let query := URI.Query.ofList query.toList { - scheme := ⟨scheme.toLower, IsLowerCase.isLowerCase_toLower⟩ + scheme authority := authority path query := query @@ -634,10 +661,23 @@ namespace URI /-- Returns a new URI with the scheme replaced. -/ -def withScheme (uri : URI) (scheme : String) : URI := - { uri with scheme := ⟨scheme.toLower, IsLowerCase.isLowerCase_toLower⟩ } +def withScheme! (uri : URI) (scheme : String) : URI := + let lower := scheme.toLower + if h : IsLowerCase lower ∧ lower.toList.all isValidSchemeChar ∧ ¬lower.isEmpty then + { uri with scheme := ⟨lower, h.1, h.2.1, h.2.2⟩ } + else + panic! s!"invalid URI scheme: {scheme.quote}" -/-- +/--/-- +Returns a new URI with the scheme replaced. +-/ +def withScheme! (uri : URI) (scheme : String) : URI := + let lower := scheme.toLower + if h : IsLowerCase lower ∧ lower.toList.all isValidSchemeChar ∧ ¬lower.isEmpty then + { uri with scheme := ⟨lower, h.1, h.2.1, h.2.2⟩ } + else + panic! s!"invalid URI scheme: {scheme.quote}" +z Returns a new URI with the authority replaced. -/ def withAuthority (uri : URI) (authority : Option URI.Authority) : URI := diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 2307b820797a..7c56fdca2111 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -64,7 +64,12 @@ private def parseScheme (config : URI.Config) : Parser URI.Scheme := do c = '+'.toUInt8 ∨ c = '-'.toUInt8 ∨ c = '.'.toUInt8) (config.maxSchemeLength - 1) let schemeBytes := first.toByteArray ++ rest.toByteArray - return ⟨String.fromUTF8! schemeBytes |>.toLower, .isLowerCase_toLower⟩ + let str := String.fromUTF8! schemeBytes |>.toLower + + if h: str.toList.all isValidSchemeChar ∧ ¬str.isEmpty then + return ⟨str, ⟨.isLowerCase_toLower, h⟩⟩ + else + fail "invalid scheme" -- port = *DIGIT private def parsePortNumber : Parser UInt16 := do @@ -156,7 +161,7 @@ private def parseHost (config : URI.Config) : Parser URI.Host := do | fail s!"invalid host" let lower := str.toLower - if h : URI.IsValidDomainName lower then + if h : URI.IsValidDomainName lower ∧ ¬lower.isEmpty then return .name ⟨lower, .isLowerCase_toLower, h⟩ else fail s!"invalid domain name: {str}" diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index 00f48a750f84..3091c47c3018 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -726,7 +726,7 @@ info: https://example.com/api/users?page=1 #guard_msgs in #eval do let uri := URI.Builder.empty - |>.setScheme "https" + |>.setScheme! "https" |>.setHost! "example.com" |>.appendPathSegment "api" |>.appendPathSegment "users" @@ -740,7 +740,7 @@ info: http://localhost:8080/ #guard_msgs in #eval do let uri := URI.Builder.empty - |>.setScheme "http" + |>.setScheme! "http" |>.setHost! "localhost" |>.setPort 8080 |>.build @@ -752,7 +752,7 @@ info: https://user:pass@secure.example.com/private #guard_msgs in #eval do let uri := URI.Builder.empty - |>.setScheme "https" + |>.setScheme! "https" |>.setUserInfo "user" (some "pass") |>.setHost! "secure.example.com" |>.appendPathSegment "private" From bfa18ef30c792fa6dae0e875ab64399eeaf59bff Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 20 Feb 2026 17:11:05 -0300 Subject: [PATCH 21/30] fix: uri builder --- src/Std/Internal/Http/Data/URI/Basic.lean | 11 +++-------- tests/lean/run/async_http_uri.lean | 4 ++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index df036c2b559a..004e18c35dce 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -461,7 +461,7 @@ structure Builder where /-- The URI scheme (e.g., "http", "https"). -/ - scheme : Option String := none + scheme : Option URI.Scheme := none /-- User information (username and optional password). @@ -507,7 +507,7 @@ Returns `none` if the scheme is not a valid RFC 3986 scheme. A scheme must start with an ASCII letter followed by alphanumeric, `+`, `-`, or `.` characters. -/ def setScheme? (b : Builder) (scheme : String) : Option Builder := - if IsLowerCase scheme ∧ scheme.toList.all isValidSchemeChar ∧ scheme.isEmpty then some { b with scheme := some scheme.toLower } + if h : IsLowerCase scheme ∧ scheme.toList.all isValidSchemeChar ∧ ¬scheme.isEmpty then some { b with scheme := some ⟨scheme, h⟩ } else none /-- @@ -617,12 +617,7 @@ Builds a complete URI from the builder state, encoding all components. Defaults none is specified. -/ def build (b : Builder) : URI := - let schemeStr := (b.scheme.getD "https").toLower - let scheme : Scheme := - if h : IsLowerCase schemeStr ∧ schemeStr.toList.all isValidSchemeChar ∧ ¬schemeStr.isEmpty then - ⟨schemeStr, h.1, h.2.1, h.2.2⟩ - else - default + let scheme := (b.scheme.getD ⟨"https", by decide⟩) let authority := if b.host.isSome then diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index 3091c47c3018..f8fe2f45542d 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -550,8 +550,8 @@ info: /x/y -- URI Modification Helpers -- ============================================================================ -#guard ((URI.parse! "http://example.com").withScheme "htTps" |>.scheme) == "https" -#guard ((URI.parse! "http://example.com").withScheme "ftP" |>.scheme) == "ftp" +#guard ((URI.parse! "http://example.com").withScheme! "htTps" |>.scheme) == "https" +#guard ((URI.parse! "http://example.com").withScheme! "ftP" |>.scheme) == "ftp" /-- info: http://example.com/#section1 From 0fb57a405f0cf6be1879b113f19d4ac53ae822cf Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 20 Feb 2026 18:02:46 -0300 Subject: [PATCH 22/30] fix: wrong comment --- src/Std/Internal/Http/Data/URI/Basic.lean | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 004e18c35dce..8c34066240ad 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -663,16 +663,7 @@ def withScheme! (uri : URI) (scheme : String) : URI := else panic! s!"invalid URI scheme: {scheme.quote}" -/--/-- -Returns a new URI with the scheme replaced. --/ -def withScheme! (uri : URI) (scheme : String) : URI := - let lower := scheme.toLower - if h : IsLowerCase lower ∧ lower.toList.all isValidSchemeChar ∧ ¬lower.isEmpty then - { uri with scheme := ⟨lower, h.1, h.2.1, h.2.2⟩ } - else - panic! s!"invalid URI scheme: {scheme.quote}" -z +/-- Returns a new URI with the authority replaced. -/ def withAuthority (uri : URI) (authority : Option URI.Authority) : URI := From 21821ef062149dfc1347f12522f573757b8e1a8f Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Tue, 24 Feb 2026 14:28:14 -0300 Subject: [PATCH 23/30] fix: encoding duplication --- src/Std/Internal/Http/Data/URI/Basic.lean | 4 +- src/Std/Internal/Http/Data/URI/Encoding.lean | 32 ++-- src/Std/Internal/Http/Data/URI/Parser.lean | 6 +- src/Std/Internal/Http/Internal/Char.lean | 172 +++++++++++++++++-- 4 files changed, 178 insertions(+), 36 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 8c34066240ad..0fb9f6d7c1e2 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -37,7 +37,7 @@ Checks if a character is valid after the first character of a URI scheme. Valid characters are ASCII alphanumeric, `+`, `-`, and `.`. -/ def isValidSchemeChar (c : Char) : Bool := - c.isAlphanum || c == '+' || c == '-' || c == '.' + Internal.Char.isValidSchemeChar c /-- URI scheme identifier (e.g., "http", "https", "ftp"). @@ -68,7 +68,7 @@ Checks if a character is valid for use in a domain name. Valid characters are ASCII alphanumeric, hyphens, and dots. -/ def isValidDomainNameChar (c : Char) : Bool := - c.isAlphanum || c == '-' || c == '.' + Internal.Char.isValidDomainNameChar c /-- Proposition that asserts all characters in a string are valid domain name characters. diff --git a/src/Std/Internal/Http/Data/URI/Encoding.lean b/src/Std/Internal/Http/Data/URI/Encoding.lean index 773ebdb6824b..a64c1cc2475c 100644 --- a/src/Std/Internal/Http/Data/URI/Encoding.lean +++ b/src/Std/Internal/Http/Data/URI/Encoding.lean @@ -35,7 +35,7 @@ Checks if a byte is a valid character in a percent-encoded URI component. Valid unreserved characters or the percent sign (for escape sequences). -/ def isEncodedChar (rule : UInt8 → Bool) (c : UInt8) : Bool := - isAscii c ∧ (rule c ∨ isHexDigit c ∨ c = '%'.toUInt8) + isAsciiByte c ∧ (rule c ∨ isHexDigitByte c ∨ c = '%'.toUInt8) /-- Checks if a byte is valid in a percent-encoded query string component. Extends `isEncodedChar` to also @@ -84,7 +84,7 @@ def isValidPercentEncoding (ba : ByteArray) : Bool := if h₂ : i + 2 < ba.size then let d1 := ba[i + 1]'(by omega) let d2 := ba[i + 2]'h₂ - if isHexDigit d1 && isHexDigit d2 then + if isHexDigitByte d1 && isHexDigitByte d2 then loop (i + 3) else false else false @@ -123,19 +123,19 @@ private theorem isAllowedEncodedQueryChars.push {bs : ByteArray} (h : isAllowedE isAllowedEncodedQueryChars r (bs.push c) := by simpa [isAllowedEncodedQueryChars, ByteArray.push, Array.all_push, And.intro h h₁] -private theorem isEncodedChar_isAscii (c : UInt8) (h : isEncodedChar r c) : isAscii c := by - simp [isEncodedChar, isAscii] at * +private theorem isEncodedChar_isAscii (c : UInt8) (h : isEncodedChar r c) : isAsciiByte c := by + simp [isEncodedChar, isAsciiByte] at * exact h.left -private theorem isEncodedQueryChar_isAscii (c : UInt8) (h : isEncodedQueryChar r c) : isAscii c := by - unfold isEncodedQueryChar isAscii at * +private theorem isEncodedQueryChar_isAscii (c : UInt8) (h : isEncodedQueryChar r c) : isAsciiByte c := by + unfold isEncodedQueryChar isAsciiByte at * simp at h rcases h next h => exact isEncodedChar_isAscii c h next h => subst_vars; decide -private theorem hexDigit_isHexDigit (h₀ : x < 16) : isHexDigit (hexDigit x) := by - unfold hexDigit isHexDigit +private theorem hexDigit_isHexDigit (h₀ : x < 16) : isHexDigitByte (hexDigit x) := by + unfold hexDigit isHexDigitByte have h₁ : x.toNat < 16 := h₀ split <;> simp [Char.toUInt8] @@ -161,19 +161,19 @@ private theorem hexDigit_isHexDigit (h₀ : x < 16) : isHexDigit (hexDigit x) := · simpa [UInt8.ofNat_add, UInt8.ofNat_sub (by omega : 10 ≤ x.toNat)] using UInt8.ofNat_le_iff_le h₄ (by decide : 70 < 256) |>.mpr h₅ -private theorem isHexDigit_isAscii {c : UInt8} (h : isHexDigit c) : isAscii c := by - simp [isHexDigit, isAscii, Char.toUInt8] at * +private theorem isHexDigit_isAscii {c : UInt8} (h : isHexDigitByte c) : isAsciiByte c := by + simp [isHexDigitByte, isAsciiByte, Char.toUInt8] at * rcases h with ⟨h1, h2⟩ | ⟨h1, h2⟩ · exact UInt8.lt_of_le_of_lt h2 (by decide) next h => exact UInt8.lt_of_le_of_lt h.right (by decide) · exact UInt8.lt_of_le_of_lt h2 (by decide) -private theorem isHexDigit_isEncodedChar {c : UInt8} (h : isHexDigit c) : isEncodedChar r c := by +private theorem isHexDigit_isEncodedChar {c : UInt8} (h : isHexDigitByte c) : isEncodedChar r c := by unfold isEncodedChar simp at * exact And.intro (isHexDigit_isAscii h) (Or.inr (Or.inl h)) -private theorem isHexDigit_isEncodedQueryChar {c : UInt8} (h : isHexDigit c) : isEncodedQueryChar r c := by +private theorem isHexDigit_isEncodedQueryChar {c : UInt8} (h : isHexDigitByte c) : isEncodedQueryChar r c := by unfold isEncodedQueryChar isEncodedChar simp at * exact Or.inl (And.intro (isHexDigit_isAscii h) (Or.inr (Or.inl h))) @@ -215,13 +215,13 @@ private theorem ByteArray.toList_toByteArray (ba : ByteArray) : simp [List.toByteArray_loop_eq, ByteArray.empty] decide -theorem ascii_is_valid_utf8 (ba : ByteArray) (s : ba.data.all isAscii) : ByteArray.IsValidUTF8 ba := by +theorem ascii_is_valid_utf8 (ba : ByteArray) (s : ba.data.all isAsciiByte) : ByteArray.IsValidUTF8 ba := by refine ⟨ba.data.toList.map Char.ofUInt8, ?_⟩ rw [List.utf8Encode] simp only [List.flatMap_map] have is_ascii : ∀ (x : UInt8), x ∈ ba.data.toList → x < 128 := by let is_ascii := Array.all_eq_true_iff_forall_mem.mp s - simp [isAscii] at is_ascii + simp [isAsciiByte] at is_ascii intro x hx exact is_ascii x (by simp_all) rw [autf8EncodeChar_flatMap_ascii is_ascii] @@ -294,7 +294,7 @@ Encodes a raw string into an `EncodedString` with automatic proof construction. -/ def encode (s : String) : EncodedString r := s.toUTF8.foldl (init := EncodedString.empty) fun acc c => - if h : isAscii c ∧ r c then + if h : isAsciiByte c ∧ r c then acc.push c (by simp [isEncodedChar]; exact And.intro h.left (Or.inl h.right)) else byteToHex c acc @@ -478,7 +478,7 @@ are kept as-is, spaces are encoded as '+', and all other characters are percent- -/ def encode (s : String) (r : UInt8 → Bool := isQueryChar) : EncodedQueryString r := s.toUTF8.foldl (init := EncodedQueryString.empty) fun acc c => - if h : isAscii c ∧ r c then + if h : isAsciiByte c ∧ r c then acc.push c (by simp [isEncodedQueryChar, isEncodedChar]; exact Or.inl (And.intro h.left (Or.inl h.right))) else if _ : c = ' '.toUInt8 then acc.push '+'.toUInt8 (by simp [isEncodedQueryChar]) diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 7c56fdca2111..003163a57712 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -51,8 +51,8 @@ private def hexToByte (digit : UInt8) : UInt8 := private def parsePctEncoded : Parser UInt8 := do skipByte '%'.toUInt8 - let hi ← hexToByte <$> satisfy isHexDigit - let lo ← hexToByte <$> satisfy isHexDigit + let hi ← hexToByte <$> satisfy isHexDigitByte + let lo ← hexToByte <$> satisfy isHexDigitByte return (hi <<< 4) ||| lo -- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) @@ -120,7 +120,7 @@ private def parseIPv6 : Parser Net.IPv6Addr := do skipByte '['.toUInt8 let result ← takeWhileUpTo1 - (fun x => x = ':'.toUInt8 ∨ isHexDigit x) + (fun x => x = ':'.toUInt8 ∨ isHexDigitByte x) 256 skipByte ']'.toUInt8 diff --git a/src/Std/Internal/Http/Internal/Char.lean b/src/Std/Internal/Http/Internal/Char.lean index f5d378a41583..fc95dfdc33ba 100644 --- a/src/Std/Internal/Http/Internal/Char.lean +++ b/src/Std/Internal/Http/Internal/Char.lean @@ -6,6 +6,7 @@ Authors: Sofia Rodrigues module prelude +import Std.Tactic.BVDecide public import Init.Data.String @[expose] @@ -35,16 +36,37 @@ def isAscii (c : Char) : Bool := Checks if a byte is a decimal digit (0-9). -/ @[inline, expose] -def isDigit (c : Char) : Bool := +def isDigitChar (c : Char) : Bool := c >= '0' ∧ c <= '9' /-- Checks if a byte is an alphabetic character (a-z or A-Z). -/ @[inline, expose] -def isAlpha (c : Char) : Bool := +def isAlphaChar (c : Char) : Bool := (c >= 'A' ∧ c <= 'Z') ∨ (c >= 'a' ∧ c <= 'z') +/-- +Checks if a byte represents an ASCII character (value < 128). +-/ +@[expose] +def isAsciiByte (c : UInt8) : Bool := + c < 128 + +/-- +Checks if a byte is a decimal digit (0-9). +-/ +@[inline, expose] +def isDigit (c : UInt8) : Bool := + c >= '0'.toUInt8 && c <= '9'.toUInt8 + +/-- +Checks if a byte is an alphabetic character (a-z or A-Z). +-/ +@[inline, expose] +def isAlpha (c : UInt8) : Bool := + (c >= 'A'.toUInt8 && c <= 'Z'.toUInt8) || (c >= 'a'.toUInt8 && c <= 'z'.toUInt8) + /-- Two character predicates are equivalent on ASCII input (`0x00`-`0x7F`). -/ @@ -59,8 +81,8 @@ tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" -/ def tchar (c : Char) : Bool := "!#$%&'*+-.^_`|~".toList.contains c - ∨ isDigit c - ∨ isAlpha c + ∨ isDigitChar c + ∨ isAlphaChar c /-- Checks if a character is a valid HTTP token character per RFC 9110 §5.6.2. @@ -235,8 +257,26 @@ def rws (c : Char) : Bool := Checks if a byte is a hexadecimal digit (0-9, a-f, or A-F). Note: This accepts both lowercase and uppercase hex digits. -/ +def isHexDigitSpec (c : Char) : Bool := + (c ≥ '0' && c ≤ '9') || + (c ≥ 'a' && c ≤ 'f') || + (c ≥ 'A' && c ≤ 'F') + +/-- +Checks if a byte is a hexadecimal digit (0-9, a-f, or A-F) using a precomputed bitmask. +-/ +@[expose] +def isHexDigit (c : Char) : Bool := + Nat.testBit 0x0000007e0000007e03ff000000000000 c.toNat + +theorem isHexDigit_eq_isHexDigitSpec_on_ascii : EqOnAscii isHexDigit isHexDigitSpec := by + decide + +/-- +Byte-level wrapper for parsers that operate on UTF-8 bytes. +-/ @[expose] -def isHexDigit (c : UInt8) : Bool := +def isHexDigitByte (c : UInt8) : Bool := (c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) || (c ≥ 'a'.toUInt8 && c ≤ 'f'.toUInt8) || (c ≥ 'A'.toUInt8 && c ≤ 'F'.toUInt8) @@ -244,31 +284,90 @@ def isHexDigit (c : UInt8) : Bool := /-- Checks if a byte is an alphanumeric digit (0-9, a-z, or A-Z). -/ -@[expose] -def isAlphaNum (c : UInt8) : Bool := +def isAlphaNumSpec (c : UInt8) : Bool := (c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) || (c ≥ 'a'.toUInt8 && c ≤ 'z'.toUInt8) || (c ≥ 'A'.toUInt8 && c ≤ 'Z'.toUInt8) + + + + /-- +Checks if a byte is an alphanumeric digit (0-9, a-z, or A-Z) using a precomputed bitmask. +-/ +@[expose] +def isAlphaNum (c : UInt8) : Bool := + Nat.testBit 0x7fffffe07fffffe03ff000000000000 c.toNat + +theorem isAlphaNum_eq_isAlphaNumSpec_on_ascii : + ∀ i : Fin 128, isAlphaNum (UInt8.ofNat i.1) = isAlphaNumSpec (UInt8.ofNat i.1) := by + decide + + +/-- +Checks if a character is an ASCII alphanumeric character. +-/ +@[inline, expose] +def isAsciiAlphaNumChar (c : Char) : Bool := + isAscii c && isAlphaNum (UInt8.ofNat c.toNat) + +/-- +Checks if a character is valid after the first character of a URI scheme. +Valid characters are ASCII alphanumeric, `+`, `-`, and `.`. +-/ +@[expose] +def isValidSchemeChar (c : Char) : Bool := + isAsciiAlphaNumChar c || c == '+' || c == '-' || c == '.' + +/-- +Checks if a character is valid for use in a domain name. +Valid characters are ASCII alphanumeric, hyphens, and dots. +-/ +@[expose] +def isValidDomainNameChar (c : Char) : Bool := + isAsciiAlphaNumChar c || c == '-' || c == '.' + /-- Checks if a byte is an unreserved character according to RFC 3986. Unreserved characters are: alphanumeric, hyphen, period, underscore, and tilde. -/ +def isUnreservedSpec (c : UInt8) : Bool := + isAlphaNumSpec c || + (c = '-'.toUInt8 || c = '.'.toUInt8 || c = '_'.toUInt8 || c = '~'.toUInt8) + + /-- +Checks if a byte is an unreserved character according to RFC 3986 using a precomputed bitmask. +-/ @[expose] def isUnreserved (c : UInt8) : Bool := - isAlphaNum c || - (c = '-'.toUInt8 || c = '.'.toUInt8 || c = '_'.toUInt8 || c = '~'.toUInt8) + Nat.testBit 0x47fffffe87fffffe03ff600000000000 c.toNat + +theorem isUnreserved_eq_isUnreservedSpec_on_ascii : + ∀ i : Fin 128, isUnreserved (UInt8.ofNat i.1) = isUnreservedSpec (UInt8.ofNat i.1) := by + decide + /-- Checks if a byte is a sub-delimiter character according to RFC 3986. Sub-delimiters are: `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`. -/ -@[expose] -def isSubDelims (c : UInt8) : Bool := +def isSubDelimsSpec (c : UInt8) : Bool := c = '!'.toUInt8 || c = '$'.toUInt8 || c = '&'.toUInt8 || c = '\''.toUInt8 || c = '('.toUInt8 || c = ')'.toUInt8 || c = '*'.toUInt8 || c = '+'.toUInt8 || c = ','.toUInt8 || c = ';'.toUInt8 || c = '='.toUInt8 + /-- +Checks if a byte is a sub-delimiter character according to RFC 3986 using a precomputed bitmask. +-/ +@[expose] +def isSubDelims (c : UInt8) : Bool := + Nat.testBit 0x28001fd200000000 c.toNat + +theorem isSubDelims_eq_isSubDelimsSpec_on_ascii : + ∀ i : Fin 128, isSubDelims (UInt8.ofNat i.1) = isSubDelimsSpec (UInt8.ofNat i.1) := by + decide + + /-- Checks if a byte is a valid path character (`pchar`) according to RFC 3986. `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` @@ -276,32 +375,75 @@ Checks if a byte is a valid path character (`pchar`) according to RFC 3986. Note: The percent-encoding (`pct-encoded`) is handled separately by `isEncodedChar`, so this predicate only covers the non-percent characters. -/ +def isPCharSpec (c : UInt8) : Bool := + isUnreservedSpec c || isSubDelimsSpec c || c = ':'.toUInt8 || c = '@'.toUInt8 + + /-- +Checks if a byte is a valid path character (`pchar`) according to RFC 3986 using a precomputed bitmask. +-/ @[expose] def isPChar (c : UInt8) : Bool := - isUnreserved c || isSubDelims c || c = ':'.toUInt8 || c = '@'.toUInt8 + Nat.testBit 0x47fffffe87ffffff2fff7fd200000000 c.toNat + +theorem isPChar_eq_isPCharSpec_on_ascii : + ∀ i : Fin 128, isPChar (UInt8.ofNat i.1) = isPCharSpec (UInt8.ofNat i.1) := by + decide + /-- Checks if a byte is a valid character in a URI query component according to RFC 3986. `query = *( pchar / "/" / "?" )` -/ +def isQueryCharSpec (c : UInt8) : Bool := + isPCharSpec c || c = '/'.toUInt8 || c = '?'.toUInt8 + + /-- +Checks if a byte is a valid character in a URI query component according to RFC 3986 using a precomputed bitmask. +-/ @[expose] def isQueryChar (c : UInt8) : Bool := - isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8 + Nat.testBit 0x47fffffe87ffffffafffffd200000000 c.toNat + +theorem isQueryChar_eq_isQueryCharSpec_on_ascii : + ∀ i : Fin 128, isQueryChar (UInt8.ofNat i.1) = isQueryCharSpec (UInt8.ofNat i.1) := by + decide + /-- Checks if a byte is a valid character in a URI fragment component according to RFC 3986. `fragment = *( pchar / "/" / "?" )` -/ +def isFragmentCharSpec (c : UInt8) : Bool := + isPCharSpec c || c = '/'.toUInt8 || c = '?'.toUInt8 + + /-- +Checks if a byte is a valid character in a URI fragment component according to RFC 3986 using a precomputed bitmask. +-/ @[expose] def isFragmentChar (c : UInt8) : Bool := - isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8 + Nat.testBit 0x47fffffe87ffffffafffffd200000000 c.toNat + +theorem isFragmentChar_eq_isFragmentCharSpec_on_ascii : + ∀ i : Fin 128, isFragmentChar (UInt8.ofNat i.1) = isFragmentCharSpec (UInt8.ofNat i.1) := by + decide /-- Checks if a byte is a valid character in a URI userinfo component according to RFC 3986. `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` -/ +def isUserInfoCharSpec (c : UInt8) : Bool := + isUnreservedSpec c || isSubDelimsSpec c || c = ':'.toUInt8 + + /-- +Checks if a byte is a valid character in a URI userinfo component according to RFC 3986 using a precomputed bitmask. +-/ @[expose] def isUserInfoChar (c : UInt8) : Bool := - isUnreserved c || isSubDelims c || c = ':'.toUInt8 + Nat.testBit 0x47fffffe87fffffe2fff7fd200000000 c.toNat + +theorem isUserInfoChar_eq_isUserInfoCharSpec_on_ascii : + ∀ i : Fin 128, isUserInfoChar (UInt8.ofNat i.1) = isUserInfoCharSpec (UInt8.ofNat i.1) := by + decide + end Std.Http.Internal.Char From 4de3e40349ef81d215886a578233c57c74247b44 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Wed, 25 Feb 2026 22:00:11 -0300 Subject: [PATCH 24/30] fix: builder --- src/Std/Internal/Http/Data/Request.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Std/Internal/Http/Data/Request.lean b/src/Std/Internal/Http/Data/Request.lean index e37952721c60..8e5b8baeb001 100644 --- a/src/Std/Internal/Http/Data/Request.lean +++ b/src/Std/Internal/Http/Data/Request.lean @@ -86,7 +86,7 @@ structure Request.Builder where /-- The head of the request. -/ - head : Head := { method := .get, version := .v11, uri := "*" } + head : Head := { method := .get, version := .v11, uri := .asteriskForm } /-- Optional dynamic metadata attached to the request. From 819d4c6c1f0604608f08d3bef174c22c8820137c Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Wed, 25 Feb 2026 22:13:19 -0300 Subject: [PATCH 25/30] fix: uri comments --- src/Std/Internal/Http/Data/URI.lean | 4 + src/Std/Internal/Http/Data/URI/Basic.lean | 88 ++++++++++++++++---- src/Std/Internal/Http/Data/URI/Encoding.lean | 13 +-- src/Std/Internal/Http/Data/URI/Parser.lean | 39 ++++----- 4 files changed, 99 insertions(+), 45 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI.lean b/src/Std/Internal/Http/Data/URI.lean index 939a833107e5..bf38529f52f0 100644 --- a/src/Std/Internal/Http/Data/URI.lean +++ b/src/Std/Internal/Http/Data/URI.lean @@ -17,6 +17,10 @@ public section This module defines the `URI` and `RequestTarget` types that represent and manipulate components of URIs as defined by RFC 3986. It provides parsing, rendering, and normalization utilities for working with URIs and request targets in HTTP messages. + +References: +* https://www.rfc-editor.org/rfc/rfc3986.html +* https://www.rfc-editor.org/rfc/rfc9112.html#section-3.3 -/ namespace Std.Http.RequestTarget diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 0fb9f6d7c1e2..6cc6491e1b64 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -21,6 +21,8 @@ paths, queries, fragments, and request targets. All text components use the encoding types from `Std.Http.URI.Encoding` to ensure proper percent-encoding is maintained throughout. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html -/ namespace Std.Http @@ -31,7 +33,6 @@ open Internal Char namespace URI - /-- Checks if a character is valid after the first character of a URI scheme. Valid characters are ASCII alphanumeric, `+`, `-`, and `.`. @@ -39,10 +40,23 @@ Valid characters are ASCII alphanumeric, `+`, `-`, and `.`. def isValidSchemeChar (c : Char) : Bool := Internal.Char.isValidSchemeChar c +/-- +Proposition that `s` is a valid URI scheme per RFC 3986: +`scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )`. + +The scheme value stored in this module is normalized to lowercase. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1 +-/ +abbrev IsValidScheme (s : String) : Prop := + IsLowerCase s ∧ + s.toList.all isValidSchemeChar ∧ + (s.toList.head?.map Internal.Char.isAlphaChar |>.getD false) + /-- URI scheme identifier (e.g., "http", "https", "ftp"). -/ -abbrev Scheme := { s : String // IsLowerCase s ∧ s.toList.all isValidSchemeChar ∧ ¬s.isEmpty } +abbrev Scheme := { s : String // IsValidScheme s } instance : Inhabited Scheme where default := ⟨"a", ⟨by decide, by decide, by decide⟩⟩ @@ -50,6 +64,8 @@ instance : Inhabited Scheme where /-- User information component containing the username and optional password. Both fields store decoded (unescaped) values. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.1 -/ structure UserInfo where /-- @@ -71,20 +87,49 @@ def isValidDomainNameChar (c : Char) : Bool := Internal.Char.isValidDomainNameChar c /-- -Proposition that asserts all characters in a string are valid domain name characters. +Checks whether a single domain label is valid. +A label must be non-empty, contain only ASCII alphanumeric characters and `-`, +cannot start or end with `-`, and must be at most 63 characters. + +References: +* https://www.rfc-editor.org/rfc/rfc1034.html#section-3.5 +* https://www.rfc-editor.org/rfc/rfc1123.html#section-2.1 +-/ +def isValidDomainLabel (s : String) : Bool := + let chars := s.toList + decide (chars.length ≤ 63) && + chars.all (fun c => Internal.Char.isAsciiAlphaNumChar c ∨ c = '-') && + (chars.head?.map Internal.Char.isAsciiAlphaNumChar |>.getD false) && + (chars.getLast?.map Internal.Char.isAsciiAlphaNumChar |>.getD false) + +/-- +Proposition that asserts a domain label is valid. +-/ +abbrev IsValidDomainLabel (s : String) : Prop := + isValidDomainLabel s + +/-- +Proposition that asserts `s` is a valid dot-separated domain name. +Each label must satisfy `IsValidDomainLabel`. -/ abbrev IsValidDomainName (s : String) : Prop := - s.toList.all isValidDomainNameChar + let labels := s.splitOn "." + ¬labels.isEmpty ∧ labels.all isValidDomainLabel /-- A domain name represented as a validated, lowercase-normalized string. Only ASCII alphanumeric characters, hyphens, and dots are allowed. +Each label cannot start or end with `-` and is limited to 63 characters. Internationalized domain names must be converted to punycode before use. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2 -/ abbrev DomainName := { s : String // IsLowerCase s ∧ IsValidDomainName s ∧ ¬s.isEmpty } /-- Host component of a URI, supporting domain names and IP addresses. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2 -/ inductive Host /-- @@ -124,6 +169,8 @@ instance : ToString Host where /-- TCP port number. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3 -/ abbrev Port := UInt16 @@ -161,12 +208,11 @@ instance : ToString Authority where | some p => s!":{p}" s!"{userPart}{hostPart}{portPart}" -namespace Authority -end Authority - /-- Hierarchical path component of a URI. Each segment is stored as an `EncodedSegment` to maintain proper percent-encoding. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3 -/ structure Path where /-- @@ -254,6 +300,8 @@ end Path Query string represented as an array of key-value pairs. Both keys and values are stored as `EncodedQueryParam` for proper application/x-www-form-urlencoded encoding. Values are optional to support parameters without values (e.g., "?flag"). Order is preserved based on insertion order. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4 -/ @[expose] def Query := Array (EncodedQueryParam × Option EncodedQueryParam) @@ -352,7 +400,6 @@ Removes all occurrences of a query parameter by key name. -/ def erase (query : Query) (key : String) : Query := let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key - -- Filter out matching keys query.filter (fun x => x.fst.toByteArray ≠ encodedKey.toByteArray) /-- @@ -504,11 +551,14 @@ def empty : Builder := {} /-- Attempts to set the URI scheme (e.g., "http", "https"). Returns `none` if the scheme is not a valid RFC 3986 scheme. -A scheme must start with an ASCII letter followed by alphanumeric, `+`, `-`, or `.` characters. +The stored scheme is normalized to lowercase. -/ def setScheme? (b : Builder) (scheme : String) : Option Builder := - if h : IsLowerCase scheme ∧ scheme.toList.all isValidSchemeChar ∧ ¬scheme.isEmpty then some { b with scheme := some ⟨scheme, h⟩ } - else none + let lower := scheme.toLower + if h : IsValidScheme lower then + some { b with scheme := some ⟨lower, h⟩ } + else + none /-- Sets the URI scheme (e.g., "http", "https"). Panics if the scheme is invalid. @@ -534,6 +584,7 @@ def setUserInfo (b : Builder) (username : String) (password : Option String := n Sets the host as a domain name, returning `none` if the name contains invalid characters. The domain name will be automatically lowercased. Only ASCII alphanumeric characters, hyphens, and dots are allowed. +Each label cannot start or end with `-` and is limited to 63 characters. Internationalized domain names must be converted to punycode before use. -/ def setHost? (b : Builder) (name : String) : Option Builder := @@ -549,6 +600,7 @@ def setHost? (b : Builder) (name : String) : Option Builder := Sets the host as a domain name, panicking if the name contains invalid characters. The domain name will be automatically lowercased. Only ASCII alphanumeric characters, hyphens, and dots are allowed. +Each label cannot start or end with `-` and is limited to 63 characters. Internationalized domain names must be converted to punycode before use. -/ def setHost! (b : Builder) (name : String) : Builder := @@ -658,8 +710,8 @@ Returns a new URI with the scheme replaced. -/ def withScheme! (uri : URI) (scheme : String) : URI := let lower := scheme.toLower - if h : IsLowerCase lower ∧ lower.toList.all isValidSchemeChar ∧ ¬lower.isEmpty then - { uri with scheme := ⟨lower, h.1, h.2.1, h.2.2⟩ } + if h : IsValidScheme lower then + { uri with scheme := ⟨lower, h⟩ } else panic! s!"invalid URI scheme: {scheme.quote}" @@ -688,7 +740,11 @@ def withFragment (uri : URI) (fragment : Option String) : URI := { uri with fragment } /-- -Normalizes a URI according to RFC 3986 Section 6. +Partially normalizes a URI by removing dot-segments from the path (`.` and `..`) +according to RFC 3986 Section 5.2.4. + +This does not apply the full normalization set from RFC 3986 Section 6 +(e.g. case normalization, percent-encoding normalization, or default-port normalization). -/ def normalize (uri : URI) : URI := { uri with @@ -700,9 +756,9 @@ def normalize (uri : URI) : URI := end URI /-- -HTTP request target forms as defined in RFC 7230 Section 5.3. +HTTP request target forms as defined in RFC 9112 Section 3.3. -Reference: https://www.rfc-editor.org/rfc/rfc7230.html#section-5.3 +Reference: https://www.rfc-editor.org/rfc/rfc9112.html#section-3.3 -/ inductive RequestTarget where /-- diff --git a/src/Std/Internal/Http/Data/URI/Encoding.lean b/src/Std/Internal/Http/Data/URI/Encoding.lean index a64c1cc2475c..a921468220c4 100644 --- a/src/Std/Internal/Http/Data/URI/Encoding.lean +++ b/src/Std/Internal/Http/Data/URI/Encoding.lean @@ -17,19 +17,22 @@ public import Std.Internal.Http.Internal.Char public section -namespace Std.Http.URI -open Internal Char - -set_option linter.all true - /-! # URI Encoding This module provides utilities for percent-encoding URI components according to RFC 3986. It includes character validation, encoding/decoding functions, and types that maintain encoding invariants through Lean's dependent type system. + +Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-2.1 -/ +namespace Std.Http.URI + +set_option linter.all true + +open Internal Char + /-- Checks if a byte is a valid character in a percent-encoded URI component. Valid characters are unreserved characters or the percent sign (for escape sequences). diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 003163a57712..70ad963dab96 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -20,43 +20,32 @@ public section This module provides parsers for URIs and request targets according to RFC 3986. It handles parsing of schemes, authorities, paths, queries, and fragments. + +References: +* https://www.rfc-editor.org/rfc/rfc3986.html +* https://www.rfc-editor.org/rfc/rfc9112.html#section-3.3 -/ namespace Std.Http.URI.Parser -open Internal Char - set_option linter.all true +open Internal Char open Std Internal Parsec ByteArray @[inline] private def tryOpt (p : Parser α) : Parser (Option α) := optional (attempt p) -@[inline] -private def ofExcept (p : Except String α) : Parser α := - match p with - | .ok res => pure res - | .error err => fail err - @[inline] private def peekIs (p : UInt8 → Bool) : Parser Bool := do return (← peekWhen? p).isSome -private def hexToByte (digit : UInt8) : UInt8 := - if digit <= '9'.toUInt8 then digit - '0'.toUInt8 - else if digit <= 'F'.toUInt8 then digit - 'A'.toUInt8 + 10 - else digit - 'a'.toUInt8 + 10 - -private def parsePctEncoded : Parser UInt8 := do - skipByte '%'.toUInt8 - let hi ← hexToByte <$> satisfy isHexDigitByte - let lo ← hexToByte <$> satisfy isHexDigitByte - return (hi <<< 4) ||| lo - -- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) private def parseScheme (config : URI.Config) : Parser URI.Scheme := do + if config.maxSchemeLength = 0 then + fail s!"scheme too long (limit: {config.maxSchemeLength})" + let first ← takeWhileUpTo1 isAlpha 1 let rest ← takeWhileUpTo (fun c => @@ -66,8 +55,8 @@ private def parseScheme (config : URI.Config) : Parser URI.Scheme := do let schemeBytes := first.toByteArray ++ rest.toByteArray let str := String.fromUTF8! schemeBytes |>.toLower - if h: str.toList.all isValidSchemeChar ∧ ¬str.isEmpty then - return ⟨str, ⟨.isLowerCase_toLower, h⟩⟩ + if h : URI.IsValidScheme str then + return ⟨str, h⟩ else fail "invalid scheme" @@ -115,12 +104,14 @@ private def parseUserInfo (config : URI.Config) : Parser URI.UserInfo := do return ⟨userName, userPass⟩ --- IP-literal = "[" ( IPv6address / IPvFuture ) "]" +-- Parses bracketed IPv6 literals. +-- Note: RFC 3986 also allows IPvFuture inside `IP-literal`; this parser +-- currently rejects IPvFuture. private def parseIPv6 : Parser Net.IPv6Addr := do skipByte '['.toUInt8 let result ← takeWhileUpTo1 - (fun x => x = ':'.toUInt8 ∨ isHexDigitByte x) + (fun x => x = ':'.toUInt8 ∨ x = '.'.toUInt8 ∨ isHexDigitByte x) 256 skipByte ']'.toUInt8 @@ -154,7 +145,7 @@ private def parseHost (config : URI.Config) : Parser URI.Host := do if let some ipv4 ← tryOpt parseIPv4 then return .ipv4 ipv4 - -- It needs to be a legal DNS label, so it differs from reg-name. + -- We intentionally parse DNS names here (not full RFC 3986 reg-name). let some str := String.fromUTF8? (← takeWhileUpTo1 (fun x => isAlphaNum x ∨ x = '-'.toUInt8 ∨ x = '.'.toUInt8) config.maxHostLength).toByteArray From 73cf41d7e567646f816f8ec9f0d24c55c67cb4a0 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Wed, 25 Feb 2026 22:30:43 -0300 Subject: [PATCH 26/30] fix: comments --- src/Std/Internal/Http/Data/URI/Basic.lean | 71 ++++++++++++++++------ src/Std/Internal/Http/Data/URI/Parser.lean | 21 ++++++- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 6cc6491e1b64..6acf0a93768f 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -16,13 +16,18 @@ public section /-! # URI Structure -This module defines the complete URI structure following RFC 3986, including schemes, authorities, -paths, queries, fragments, and request targets. +This module defines an HTTP-oriented URI structure aligned with RFC 3986 and RFC 9110, including +schemes, authorities, paths, queries, fragments, and request targets. + +Host handling is intentionally strict: this module accepts IPv4, bracketed IPv6, and DNS-style +domain names (LDH labels). RFC 3986 `reg-name` forms that are not DNS-compatible are rejected. All text components use the encoding types from `Std.Http.URI.Encoding` to ensure proper percent-encoding is maintained throughout. -Reference: https://www.rfc-editor.org/rfc/rfc3986.html +References: +* https://www.rfc-editor.org/rfc/rfc3986.html +* https://www.rfc-editor.org/rfc/rfc9110.html#name-uri-references -/ namespace Std.Http @@ -61,6 +66,29 @@ abbrev Scheme := { s : String // IsValidScheme s } instance : Inhabited Scheme where default := ⟨"a", ⟨by decide, by decide, by decide⟩⟩ +namespace Scheme + +/-- +Attempts to create a `Scheme` from a string, normalizing to lowercase. +Returns `none` if the scheme is invalid per RFC 3986 Section 3.1. +-/ +def ofString? (s : String) : Option Scheme := + let lower := s.toLower + if h : IsValidScheme lower then + some ⟨lower, h⟩ + else + none + +/-- +Creates a `Scheme` from a string, panicking if invalid. +-/ +def ofString! (s : String) : Scheme := + match ofString? s with + | some scheme => scheme + | none => panic! s!"invalid URI scheme: {s.quote}" + +end Scheme + /-- User information component containing the username and optional password. Both fields store decoded (unescaped) values. @@ -126,6 +154,23 @@ Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2 -/ abbrev DomainName := { s : String // IsLowerCase s ∧ IsValidDomainName s ∧ ¬s.isEmpty } +namespace DomainName + +/-- +Attempts to create a normalized domain name from a string. +Returns `none` if the name is empty or any label violates DNS label constraints. +-/ +def ofString? (s : String) : Option DomainName := + let lower := s.toLower + if h₁ : lower.isEmpty then + none + else if h₂ : IsValidDomainName lower then + some ⟨lower, IsLowerCase.isLowerCase_toLower, h₂, h₁⟩ + else + none + +end DomainName + /-- Host component of a URI, supporting domain names and IP addresses. @@ -554,11 +599,7 @@ Returns `none` if the scheme is not a valid RFC 3986 scheme. The stored scheme is normalized to lowercase. -/ def setScheme? (b : Builder) (scheme : String) : Option Builder := - let lower := scheme.toLower - if h : IsValidScheme lower then - some { b with scheme := some ⟨lower, h⟩ } - else - none + URI.Scheme.ofString? scheme |>.map (fun scheme => { b with scheme := some scheme }) /-- Sets the URI scheme (e.g., "http", "https"). Panics if the scheme is invalid. @@ -588,13 +629,7 @@ Each label cannot start or end with `-` and is limited to 63 characters. Internationalized domain names must be converted to punycode before use. -/ def setHost? (b : Builder) (name : String) : Option Builder := - let lower := name.toLower - if h₁: lower.isEmpty then - none - else if h : IsValidDomainName lower then - some { b with host := some (Host.name ⟨lower, IsLowerCase.isLowerCase_toLower, h, h₁⟩) } - else - none + URI.DomainName.ofString? name |>.map (fun name => { b with host := some (Host.name name) }) /-- Sets the host as a domain name, panicking if the name contains invalid characters. @@ -709,11 +744,7 @@ namespace URI Returns a new URI with the scheme replaced. -/ def withScheme! (uri : URI) (scheme : String) : URI := - let lower := scheme.toLower - if h : IsValidScheme lower then - { uri with scheme := ⟨lower, h⟩ } - else - panic! s!"invalid URI scheme: {scheme.quote}" + { uri with scheme := URI.Scheme.ofString! scheme } /-- Returns a new URI with the authority replaced. diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index 70ad963dab96..bfcf7472e279 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -18,11 +18,16 @@ public section /-! # URI Parser -This module provides parsers for URIs and request targets according to RFC 3986. +This module provides parsers for HTTP request targets and HTTP-oriented URIs aligned with RFC 3986. It handles parsing of schemes, authorities, paths, queries, and fragments. +Notable intentional constraints: +* hosts are limited to IPv4, bracketed IPv6, and DNS-style domain names +* IPvFuture (`v...`) inside `IP-literal` is currently rejected + References: * https://www.rfc-editor.org/rfc/rfc3986.html +* https://www.rfc-editor.org/rfc/rfc9110.html#name-uri-references * https://www.rfc-editor.org/rfc/rfc9112.html#section-3.3 -/ @@ -194,6 +199,7 @@ path-empty = 0 Parses a URI path with combined parsing and validation. -/ def parsePath (config : URI.Config) (forceAbsolute : Bool) (allowEmpty : Bool) : Parser URI.Path := do + let isPathDelimiter : UInt8 → Bool := fun c => c = '?'.toUInt8 ∨ c = '#'.toUInt8 let mut isAbsolute := false let mut segments : Array _ := #[] let mut totalLength := 0 @@ -219,6 +225,13 @@ def parsePath (config : URI.Config) (forceAbsolute : Bool) (allowEmpty : Bool) : -- Parse segments while (← peek?).isSome do + let some next := (← peek?) | break + if isPathDelimiter next then + break + + if ¬(next = '/'.toUInt8 ∨ isPChar next ∨ next = '%'.toUInt8) then + break + if segments.size >= config.maxPathSegments then fail s!"too many path segments (limit: {config.maxPathSegments})" @@ -238,7 +251,8 @@ def parsePath (config : URI.Config) (forceAbsolute : Bool) (allowEmpty : Bool) : fail s!"path too long (limit: {config.maxTotalPathLength} bytes)" skip -- If path ends with '/', add empty segment - if (← peek?).isNone then + let next ← peek? + if next.isNone || next.any isPathDelimiter then segments := segments.push (URI.EncodedString.empty) else break @@ -253,6 +267,9 @@ private def parseQuery (config : URI.Config) : Parser URI.Query := do let some queryStr := String.fromUTF8? queryBytes.toByteArray | fail "invalid query string" + if queryStr.isEmpty then + return URI.Query.empty + let rawPairs := queryStr.splitOn "&" if rawPairs.length > config.maxQueryParams then From 5cb510cdf734298f286b5fe7a22963b537c37b5c Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Thu, 26 Feb 2026 01:01:24 -0300 Subject: [PATCH 27/30] fix: precedence --- src/Std/Internal/Http/Data/URI/Basic.lean | 130 +++++++++++++++------ src/Std/Internal/Http/Data/URI/Parser.lean | 51 +++++--- src/Std/Internal/Http/Internal/Char.lean | 28 +++-- tests/lean/run/async_http_uri.lean | 60 +++++++++- 4 files changed, 196 insertions(+), 73 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index 6acf0a93768f..fe685c1664e9 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -90,23 +90,56 @@ def ofString! (s : String) : Scheme := end Scheme /-- -User information component containing the username and optional password. Both fields store decoded -(unescaped) values. +User information component containing an encoded username and optional encoded password. + +The stored strings use URI userinfo percent-encoding so parsed URIs can be rendered without +losing percent-encoding choices (for example, `%3A` versus `:`). + +Note: embedding passwords in URIs is deprecated per RFC 9110 Section 4.2.4. Avoid using the +password field in new code, and never include it in logs or error messages. Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.1 -/ structure UserInfo where /-- - The username (decoded). + The encoded username. -/ - username : String + username : EncodedUserInfo /-- - The optional password (decoded). + The optional encoded password. -/ - password : Option String + password : Option EncodedUserInfo deriving Inhabited, Repr +namespace UserInfo + +/-- +Builds a `UserInfo` value from raw strings by applying userinfo percent-encoding. +-/ +@[inline] +def ofStrings (username : String) (password : Option String := none) : UserInfo := + { + username := EncodedUserInfo.encode username + password := EncodedUserInfo.encode <$> password + } + +/-- +Returns the decoded username, or `none` if decoding fails UTF-8 validation. +-/ +@[inline] +def username? (ui : UserInfo) : Option String := + ui.username.decode + +/-- +Returns the decoded password when present, or `none` if absent or decoding fails UTF-8 validation. +-/ +@[inline] +def password? (ui : UserInfo) : Option String := + ui.password.bind EncodedUserInfo.decode + +end UserInfo + /-- Checks if a character is valid for use in a domain name. Valid characters are ASCII alphanumeric, hyphens, and dots. @@ -138,11 +171,11 @@ abbrev IsValidDomainLabel (s : String) : Prop := /-- Proposition that asserts `s` is a valid dot-separated domain name. -Each label must satisfy `IsValidDomainLabel`. +Each label must satisfy `IsValidDomainLabel`, and the full name must be at most 253 characters. -/ abbrev IsValidDomainName (s : String) : Prop := let labels := s.splitOn "." - ¬labels.isEmpty ∧ labels.all isValidDomainLabel + ¬labels.isEmpty ∧ labels.all isValidDomainLabel ∧ s.toList.length ≤ 253 /-- A domain name represented as a validated, lowercase-normalized string. @@ -158,14 +191,18 @@ namespace DomainName /-- Attempts to create a normalized domain name from a string. -Returns `none` if the name is empty or any label violates DNS label constraints. +Returns `none` if the name is empty, longer than 253 characters, or any label violates DNS label +constraints. -/ def ofString? (s : String) : Option DomainName := let lower := s.toLower if h₁ : lower.isEmpty then none - else if h₂ : IsValidDomainName lower then - some ⟨lower, IsLowerCase.isLowerCase_toLower, h₂, h₁⟩ + else if lower.toList.length ≤ 253 then + if h₃ : IsValidDomainName lower then + some ⟨lower, IsLowerCase.isLowerCase_toLower, h₃, h₁⟩ + else + none else none @@ -245,8 +282,8 @@ instance : ToString Authority where toString auth := let userPart := match auth.userInfo with | none => "" - | some ⟨name, some pass⟩ => s!"{toString (EncodedString.encode name (r := isUnreserved))}:{toString (EncodedString.encode pass (r := isUnreserved))}@" - | some ⟨name, none⟩ => s!"{toString (EncodedString.encode name (r := isUnreserved))}@" + | some ⟨name, some pass⟩ => s!"{name}:{pass}@" + | some ⟨name, none⟩ => s!"{name}@" let hostPart := toString auth.host let portPart := match auth.port with | none => "" @@ -392,21 +429,33 @@ def formatQueryParam (key : EncodedQueryParam) (value : Option EncodedQueryParam Finds the first value of a query parameter by key name. Returns `none` if the key is not found. The value remains encoded as `EncodedQueryParam`. -/ -def find? (query : Query) (key : String) : Option (Option EncodedQueryParam) := - let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key - let matchingKey := Array.find? (fun x => x.fst.toByteArray = encodedKey.toByteArray) query +def findEncoded? (query : Query) (key : EncodedQueryParam) : Option (Option EncodedQueryParam) := + let matchingKey := Array.find? (fun x => x.fst.toByteArray = key.toByteArray) query matchingKey.map (fun x => x.snd) +/-- +Finds the first value of a query parameter by raw key string. The key is percent-encoded before +matching. This avoids aliasing between raw and pre-encoded spellings. +-/ +def find? (query : Query) (key : String) : Option (Option EncodedQueryParam) := + query.findEncoded? (EncodedQueryParam.encode key) + /-- Finds all values of a query parameter by key name. Returns an empty array if the key is not found. The values remain encoded as `EncodedQueryParam`. -/ -def findAll (query : Query) (key : String) : Array (Option EncodedQueryParam) := - let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key +def findAllEncoded (query : Query) (key : EncodedQueryParam) : Array (Option EncodedQueryParam) := query.filterMap (fun x => - if x.fst.toByteArray = encodedKey.toByteArray then - some (x.snd) - else none) + if x.fst.toByteArray = key.toByteArray then + some x.snd + else + none) + +/-- +Finds all values of a query parameter by raw key string. The key is percent-encoded before matching. +-/ +def findAll (query : Query) (key : String) : Array (Option EncodedQueryParam) := + query.findAllEncoded (EncodedQueryParam.encode key) /-- Adds a query parameter to the query string. @@ -417,7 +466,7 @@ def insert (query : Query) (key : String) (value : String) : Query := query.push (encodedKey, some encodedValue) /-- -Adds a query parameter to the query string. +Adds an already-encoded key-value pair to the query string. -/ def insertEncoded (query : Query) (key : EncodedQueryParam) (value : Option EncodedQueryParam) : Query := query.push (key, value) @@ -436,16 +485,28 @@ def ofList (pairs : List (EncodedQueryParam × Option EncodedQueryParam)) : Quer /-- Checks if a query parameter exists. -/ +def containsEncoded (query : Query) (key : EncodedQueryParam) : Bool := + query.any (fun x => x.fst.toByteArray = key.toByteArray) + +/-- +Checks if a query parameter exists by raw key string. The key is percent-encoded before matching. +-/ def contains (query : Query) (key : String) : Bool := - let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key - query.any (fun x => x.fst.toByteArray = encodedKey.toByteArray) + query.containsEncoded (EncodedQueryParam.encode key) /-- Removes all occurrences of a query parameter by key name. -/ +def eraseEncoded (query : Query) (key : EncodedQueryParam) : Query := + query.filter (fun x => + x.fst.toByteArray ≠ key.toByteArray + ) + +/-- +Removes all occurrences of a query parameter by raw key string. The key is percent-encoded before matching. +-/ def erase (query : Query) (key : String) : Query := - let encodedKey : EncodedQueryParam := EncodedQueryParam.encode key - query.filter (fun x => x.fst.toByteArray ≠ encodedKey.toByteArray) + query.eraseEncoded (EncodedQueryParam.encode key) /-- Gets the first value of a query parameter by key name, decoded as a string. @@ -615,11 +676,7 @@ Sets the user information with username and optional password. The strings will be automatically percent-encoded. -/ def setUserInfo (b : Builder) (username : String) (password : Option String := none) : Builder := - { b with userInfo := some { - username := username - password := password - } - } + { b with userInfo := some (UserInfo.ofStrings username password) } /-- Sets the host as a domain name, returning `none` if the name contains invalid characters. @@ -774,15 +831,12 @@ def withFragment (uri : URI) (fragment : Option String) : URI := Partially normalizes a URI by removing dot-segments from the path (`.` and `..`) according to RFC 3986 Section 5.2.4. -This does not apply the full normalization set from RFC 3986 Section 6 -(e.g. case normalization, percent-encoding normalization, or default-port normalization). +This does not apply the full normalization set from RFC 3986 Section 6 — for example, +case normalization, percent-encoding normalization, and default-port normalization are +not performed. -/ def normalize (uri : URI) : URI := - { uri with - scheme := uri.scheme - authority := uri.authority - path := uri.path.normalize - } + { uri with path := uri.path.normalize } end URI diff --git a/src/Std/Internal/Http/Data/URI/Parser.lean b/src/Std/Internal/Http/Data/URI/Parser.lean index bfcf7472e279..09f015108243 100644 --- a/src/Std/Internal/Http/Data/URI/Parser.lean +++ b/src/Std/Internal/Http/Data/URI/Parser.lean @@ -49,7 +49,7 @@ private def peekIs (p : UInt8 → Bool) : Parser Bool := do -- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) private def parseScheme (config : URI.Config) : Parser URI.Scheme := do if config.maxSchemeLength = 0 then - fail s!"scheme too long (limit: {config.maxSchemeLength})" + fail "scheme length limit is 0 (no scheme allowed)" let first ← takeWhileUpTo1 isAlpha 1 let rest ← takeWhileUpTo @@ -87,27 +87,24 @@ private def parseUserInfo (config : URI.Config) : Parser URI.UserInfo := do (isUserInfoChar x ∨ x = '%'.toUInt8)) config.maxUserInfoLength - let some userName := URI.EncodedUserInfo.ofByteArray? userBytesName.toByteArray + let some userNameEncoded := URI.EncodedUserInfo.ofByteArray? userBytesName.toByteArray | fail "invalid percent encoding in user info" - let userPass ← if ← peekIs (· == ':'.toUInt8) then + let userPassEncoded ← if ← peekIs (· == ':'.toUInt8) then skip let userBytesPass ← takeWhileUpTo (fun x => isUserInfoChar x ∨ x = '%'.toUInt8) config.maxUserInfoLength - let some userStrPass := URI.EncodedUserInfo.ofByteArray? userBytesPass.toByteArray >>= URI.EncodedUserInfo.decode + let some userPassEncoded := URI.EncodedUserInfo.ofByteArray? userBytesPass.toByteArray | fail "invalid percent encoding in user info" - pure <| some userStrPass + pure <| some userPassEncoded else pure none - let some userName := userName.decode - | fail "invalid username" - - return ⟨userName, userPass⟩ + return ⟨userNameEncoded, userPassEncoded⟩ -- Parses bracketed IPv6 literals. -- Note: RFC 3986 also allows IPvFuture inside `IP-literal`; this parser @@ -125,7 +122,7 @@ private def parseIPv6 : Parser Net.IPv6Addr := do let some ipv6Addr := Std.Net.IPv6Addr.ofString ipv6Str | fail s!"invalid IPv6 address: {ipv6Str}" - return ipv6Addr + return ipv6Addr -- IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet private def parseIPv4 : Parser Net.IPv4Addr := do @@ -134,10 +131,10 @@ private def parseIPv4 : Parser Net.IPv4Addr := do 256 let ipv4Str := String.fromUTF8! result.toByteArray - let some ipv4Str := Std.Net.IPv4Addr.ofString ipv4Str + let some ipv4Addr := Std.Net.IPv4Addr.ofString ipv4Str | fail s!"invalid IPv4 address: {ipv4Str}" - return ipv4Str + return ipv4Addr -- host = IP-literal / IPv4address / reg-name -- Note: RFC 1123 allows domain labels to start with digits, so we must try IPv4 @@ -253,6 +250,8 @@ def parsePath (config : URI.Config) (forceAbsolute : Bool) (allowEmpty : Bool) : -- If path ends with '/', add empty segment let next ← peek? if next.isNone || next.any isPathDelimiter then + if segments.size >= config.maxPathSegments then + fail s!"too many path segments (limit: {config.maxPathSegments})" segments := segments.push (URI.EncodedString.empty) else break @@ -284,7 +283,7 @@ private def parseQuery (config : URI.Config) : Parser URI.Query := do let key ← URI.EncodedQueryParam.fromString? key let value ← URI.EncodedQueryParam.fromString? (String.intercalate "=" value) pure (acc.insertEncoded key (some value)) - | [] => pure acc + | [] => pure acc -- unreachable: splitOn always returns at least one element if let some pairs := pairs then return pairs @@ -338,7 +337,7 @@ public def parseURI (config : URI.Config := {}) : Parser URI := do Parses a request target with combined parsing and validation. -/ public def parseRequestTarget (config : URI.Config := {}) : Parser RequestTarget := - asterisk <|> origin <|> authority <|> absolute + asterisk <|> origin <|> absoluteHttp <|> authority <|> absolute where -- The asterisk form asterisk : Parser RequestTarget := do @@ -356,9 +355,7 @@ where else fail "not origin" - -- absolute-URI = scheme ":" hier-part [ "?" query ] - absolute : Parser RequestTarget := attempt do - let scheme ← parseScheme config + absoluteFromScheme (scheme : URI.Scheme) : Parser RequestTarget := do skipByte ':'.toUInt8 let (auth, path) ← parseHierPart config let query ← optional (skipByteChar '?' *> parseQuery config) @@ -366,6 +363,26 @@ where return .absoluteForm { path, scheme, authority := auth, query, fragment := none } (by simp) + -- Prefer absolute-form for explicit HTTP(S) scheme targets. + -- This avoids misclassifying full URIs like `http://host/path` as authority-form. + absoluteHttp : Parser RequestTarget := attempt do + let uri ← parseURI config + let schemeStr : String := uri.scheme + if h : (schemeStr = "http" || schemeStr = "https") && uri.fragment.isNone then + have hEqNone : uri.fragment = none := by + simp at h + exact h.right + have hNoFrag : uri.fragment.isNone = true := by + simp [hEqNone] + return .absoluteForm uri hNoFrag + else + fail "not http absolute uri" + + -- absolute-URI = scheme ":" hier-part [ "?" query ] + absolute : Parser RequestTarget := attempt do + let scheme ← parseScheme config + absoluteFromScheme scheme + -- authority-form = host ":" port authority : Parser RequestTarget := attempt do let host ← parseHost config diff --git a/src/Std/Internal/Http/Internal/Char.lean b/src/Std/Internal/Http/Internal/Char.lean index 583d6f30ab0a..cdfce1f76351 100644 --- a/src/Std/Internal/Http/Internal/Char.lean +++ b/src/Std/Internal/Http/Internal/Char.lean @@ -25,21 +25,21 @@ namespace Std.Http.Internal.Char set_option linter.all true /-- -Checks if a byte represents an ASCII character (value < 128). +Checks if a character is ASCII (Unicode code point < 128). -/ @[expose] def isAscii (c : Char) : Bool := c.toNat < 128 /-- -Checks if a byte is a decimal digit (0-9). +Checks if a character is a decimal digit (0-9). -/ @[inline, expose] def isDigitChar (c : Char) : Bool := c ≥ '0' ∧ c ≤ '9' /-- -Checks if a byte is an alphabetic character (a-z or A-Z). +Checks if a character is an alphabetic character (a-z or A-Z). -/ @[inline, expose] def isAlphaChar (c : Char) : Bool := @@ -260,7 +260,8 @@ def obsText (c : Char) : Bool := /-- reason-phrase character class: -HTAB / SP / VCHAR / obs-text +HTAB / SP / VCHAR +; ASCII-only variant (no obs-text). Reference: https://httpwg.org/specs/rfc9110.html#reason.phrase -/ @@ -296,7 +297,7 @@ theorem isHexDigit_eq_isHexDigitSpec_on_ascii : EqOnAscii isHexDigit isHexDigitS decide /-- -Byte-level wrapper for parsers that operate on UTF-8 bytes. +Checks if a byte is a hexadecimal digit (0-9, a-f, or A-F). -/ @[expose] def isHexDigitByte (c : UInt8) : Bool := @@ -312,10 +313,7 @@ def isAlphaNumSpec (c : UInt8) : Bool := (c ≥ 'a'.toUInt8 && c ≤ 'z'.toUInt8) || (c ≥ 'A'.toUInt8 && c ≤ 'Z'.toUInt8) - - - - /-- +/-- Checks if a byte is an alphanumeric digit (0-9, a-z, or A-Z) using a precomputed bitmask. -/ @[expose] @@ -358,7 +356,7 @@ def isUnreservedSpec (c : UInt8) : Bool := isAlphaNumSpec c || (c = '-'.toUInt8 || c = '.'.toUInt8 || c = '_'.toUInt8 || c = '~'.toUInt8) - /-- +/-- Checks if a byte is an unreserved character according to RFC 3986 using a precomputed bitmask. -/ @[expose] @@ -379,7 +377,7 @@ def isSubDelimsSpec (c : UInt8) : Bool := c = '('.toUInt8 || c = ')'.toUInt8 || c = '*'.toUInt8 || c = '+'.toUInt8 || c = ','.toUInt8 || c = ';'.toUInt8 || c = '='.toUInt8 - /-- +/-- Checks if a byte is a sub-delimiter character according to RFC 3986 using a precomputed bitmask. -/ @[expose] @@ -401,7 +399,7 @@ so this predicate only covers the non-percent characters. def isPCharSpec (c : UInt8) : Bool := isUnreservedSpec c || isSubDelimsSpec c || c = ':'.toUInt8 || c = '@'.toUInt8 - /-- +/-- Checks if a byte is a valid path character (`pchar`) according to RFC 3986 using a precomputed bitmask. -/ @[expose] @@ -420,7 +418,7 @@ Checks if a byte is a valid character in a URI query component according to RFC def isQueryCharSpec (c : UInt8) : Bool := isPCharSpec c || c = '/'.toUInt8 || c = '?'.toUInt8 - /-- +/-- Checks if a byte is a valid character in a URI query component according to RFC 3986 using a precomputed bitmask. -/ @[expose] @@ -439,7 +437,7 @@ Checks if a byte is a valid character in a URI fragment component according to R def isFragmentCharSpec (c : UInt8) : Bool := isPCharSpec c || c = '/'.toUInt8 || c = '?'.toUInt8 - /-- +/-- Checks if a byte is a valid character in a URI fragment component according to RFC 3986 using a precomputed bitmask. -/ @[expose] @@ -457,7 +455,7 @@ Checks if a byte is a valid character in a URI userinfo component according to R def isUserInfoCharSpec (c : UInt8) : Bool := isUnreservedSpec c || isSubDelimsSpec c || c = ':'.toUInt8 - /-- +/-- Checks if a byte is a valid character in a URI userinfo component according to RFC 3986 using a precomputed bitmask. -/ @[expose] diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index f8fe2f45542d..dedf719fa18a 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -208,10 +208,16 @@ info: some " " #eval parseCheck "http://[2001:db8::1]:8080/path" #eval parseCheck "https://xn--nxasmq6b.xn--o3cw4h/path" #eval parseCheck "localhost:65535" +#eval parseCheck "http:80" #eval parseCheck "https://user:pass@secure.example.com/private" #eval parseCheck "/double//slash//path" #eval parseCheck "http://user%40example:pass%3Aword@host.com" +#guard + match (parseRequestTarget <* Std.Internal.Parsec.eof).run "http:80".toUTF8 with + | .ok (.absoluteForm _ _) => true + | _ => false + -- Parse failure tests #eval parseCheckFail "/path with space" #eval parseCheckFail "/path/%" @@ -224,6 +230,13 @@ info: some " " #eval parseCheckFail "/path/\n" #eval parseCheckFail "/path/\u0000" +-- maxPathSegments should apply to trailing empty segments as well. +#guard + match (parseURI { maxPathSegments := 1 } <* Std.Internal.Parsec.eof).run + "http://example.com/a/".toUTF8 with + | .error _ => true + | .ok _ => false + -- ============================================================================ -- Request Target Parsing - Detailed Output Tests -- ============================================================================ @@ -332,7 +345,7 @@ info: Std.Http.RequestTarget.absoluteForm /-- info: Std.Http.RequestTarget.absoluteForm { scheme := "https", - authority := some { userInfo := some { username := "user b", password := some "pass" }, + authority := some { userInfo := some { username := "user%20b", password := some "pass" }, host := Std.Http.URI.Host.name "secure.example.com", port := none }, path := { segments := #["private"], absolute := true }, @@ -394,7 +407,7 @@ info: some { username := "user", password := some "pass" } | none => IO.println "no authority" /-- -info: some { username := "user only", password := none } +info: some { username := "user%20only", password := none } -/ #guard_msgs in #eval show IO _ from do @@ -414,7 +427,7 @@ info: some { username := "", password := some "pass" } | none => IO.println "no authority" /-- -info: some { username := "user", password := some "p@ss:w0rd" } +info: some { username := "user", password := some "p%40ss%3Aw0rd" } -/ #guard_msgs in #eval show IO _ from do @@ -670,6 +683,36 @@ info: some (some "value") let result ← runParser parseRequestTarget "/api?key=value" IO.println (repr (result.query.find? "key")) +-- Raw lookup APIs should not alias with pre-encoded key spellings. +#guard + match (parseRequestTarget <* Std.Internal.Parsec.eof).run "/api?%61=1&a=2".toUTF8 with + | .ok result => + let encodedA? := EncodedQueryParam.fromString? "%61" + ((result.query.find? "a" |>.bind id |>.bind EncodedQueryParam.decode) == some "2") && + (result.query.find? "%61").isNone && + result.query.contains "a" && + !result.query.contains "%61" && + (match encodedA? with + | some encodedA => + ((result.query.findEncoded? encodedA |>.bind id |>.bind EncodedQueryParam.decode) == some "1") && + result.query.containsEncoded encodedA + | none => false) + | .error _ => false + +#guard + match (parseRequestTarget <* Std.Internal.Parsec.eof).run "/api?%61=1&a=2".toUTF8 with + | .ok result => + match EncodedQueryParam.fromString? "%61" with + | some encodedA => + let erasedRaw := result.query.erase "a" + let erasedEncoded := result.query.eraseEncoded encodedA + !erasedRaw.contains "a" && + erasedRaw.containsEncoded encodedA && + !erasedEncoded.containsEncoded encodedA && + erasedEncoded.contains "a" + | none => false + | .error _ => false + -- ============================================================================ -- Query Operations -- ============================================================================ @@ -720,6 +763,17 @@ info: none -- URI Builder Tests -- ============================================================================ +-- Domain names longer than 253 characters are rejected. +#guard + let label := String.ofList (List.replicate 63 'a') + let longDomain := s!"{label}.{label}.{label}.{label}" + (URI.DomainName.ofString? longDomain).isNone + +#guard + let label := String.ofList (List.replicate 63 'a') + let longDomain := s!"{label}.{label}.{label}.{label}" + (URI.Builder.empty.setHost? longDomain).isNone + /-- info: https://example.com/api/users?page=1 -/ From 81f76a24d81e68ee698fd28abe75d2ea289463cc Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Thu, 26 Feb 2026 16:10:46 -0300 Subject: [PATCH 28/30] fix:lower case scheme --- src/Std/Internal/Http/Data/URI/Basic.lean | 50 +++++++---------------- src/Std/Internal/Http/Internal/Char.lean | 44 +++++++++++++++++--- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index fe685c1664e9..ce66de32810d 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -38,13 +38,6 @@ open Internal Char namespace URI -/-- -Checks if a character is valid after the first character of a URI scheme. -Valid characters are ASCII alphanumeric, `+`, `-`, and `.`. --/ -def isValidSchemeChar (c : Char) : Bool := - Internal.Char.isValidSchemeChar c - /-- Proposition that `s` is a valid URI scheme per RFC 3986: `scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )`. @@ -56,7 +49,8 @@ Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1 abbrev IsValidScheme (s : String) : Prop := IsLowerCase s ∧ s.toList.all isValidSchemeChar ∧ - (s.toList.head?.map Internal.Char.isAlphaChar |>.getD false) + (s.toList.head?.map isAlphaChar |>.getD false) ∧ -- ALPHA + IsLowerCase s -- An implementation should accept uppercase letters as equivalent to lowercase in scheme names /-- URI scheme identifier (e.g., "http", "https", "ftp"). @@ -64,7 +58,7 @@ URI scheme identifier (e.g., "http", "https", "ftp"). abbrev Scheme := { s : String // IsValidScheme s } instance : Inhabited Scheme where - default := ⟨"a", ⟨by decide, by decide, by decide⟩⟩ + default := ⟨"http", ⟨by decide, by decide, by decide⟩⟩ namespace Scheme @@ -74,13 +68,14 @@ Returns `none` if the scheme is invalid per RFC 3986 Section 3.1. -/ def ofString? (s : String) : Option Scheme := let lower := s.toLower + if h : IsValidScheme lower then some ⟨lower, h⟩ else none /-- -Creates a `Scheme` from a string, panicking if invalid. +Creates a `Scheme` from a string, normalizing to lowercase. Panicks if invalid. -/ def ofString! (s : String) : Scheme := match ofString? s with @@ -141,16 +136,8 @@ def password? (ui : UserInfo) : Option String := end UserInfo /-- -Checks if a character is valid for use in a domain name. -Valid characters are ASCII alphanumeric, hyphens, and dots. --/ -def isValidDomainNameChar (c : Char) : Bool := - Internal.Char.isValidDomainNameChar c - -/-- -Checks whether a single domain label is valid. -A label must be non-empty, contain only ASCII alphanumeric characters and `-`, -cannot start or end with `-`, and must be at most 63 characters. +Checks whether a single domain label is valid. A label must be non-empty, contain only ASCII +alphanumeric characters and `-`, cannot start or end with `-`, and must be at most 63 characters. References: * https://www.rfc-editor.org/rfc/rfc1034.html#section-3.5 @@ -159,15 +146,9 @@ References: def isValidDomainLabel (s : String) : Bool := let chars := s.toList decide (chars.length ≤ 63) && - chars.all (fun c => Internal.Char.isAsciiAlphaNumChar c ∨ c = '-') && - (chars.head?.map Internal.Char.isAsciiAlphaNumChar |>.getD false) && - (chars.getLast?.map Internal.Char.isAsciiAlphaNumChar |>.getD false) - -/-- -Proposition that asserts a domain label is valid. --/ -abbrev IsValidDomainLabel (s : String) : Prop := - isValidDomainLabel s + chars.all (fun c => isAsciiAlphaNumChar c ∨ c = '-') && + (chars.head?.map isAsciiAlphaNumChar |>.getD false) && + (chars.getLast?.map isAsciiAlphaNumChar |>.getD false) /-- Proposition that asserts `s` is a valid dot-separated domain name. @@ -175,7 +156,7 @@ Each label must satisfy `IsValidDomainLabel`, and the full name must be at most -/ abbrev IsValidDomainName (s : String) : Prop := let labels := s.splitOn "." - ¬labels.isEmpty ∧ labels.all isValidDomainLabel ∧ s.toList.length ≤ 253 + ¬labels.isEmpty ∧ labels.all isValidDomainLabel ∧ s.length ≤ 255 /-- A domain name represented as a validated, lowercase-normalized string. @@ -191,18 +172,15 @@ namespace DomainName /-- Attempts to create a normalized domain name from a string. -Returns `none` if the name is empty, longer than 253 characters, or any label violates DNS label +Returns `none` if the name is empty, longer than 255 characters, or any label violates DNS label constraints. -/ def ofString? (s : String) : Option DomainName := let lower := s.toLower if h₁ : lower.isEmpty then none - else if lower.toList.length ≤ 253 then - if h₃ : IsValidDomainName lower then - some ⟨lower, IsLowerCase.isLowerCase_toLower, h₃, h₁⟩ - else - none + else if h₃ : IsValidDomainName lower then + some ⟨lower, IsLowerCase.isLowerCase_toLower, h₃, h₁⟩ else none diff --git a/src/Std/Internal/Http/Internal/Char.lean b/src/Std/Internal/Http/Internal/Char.lean index cdfce1f76351..b3b33b64dd14 100644 --- a/src/Std/Internal/Http/Internal/Char.lean +++ b/src/Std/Internal/Http/Internal/Char.lean @@ -326,27 +326,58 @@ theorem isAlphaNum_eq_isAlphaNumSpec_on_ascii : /-- -Checks if a character is an ASCII alphanumeric character. +Checks if `c` is valid after `\` in an HTTP `quoted-pair` (ASCII-only). -/ @[inline, expose] -def isAsciiAlphaNumChar (c : Char) : Bool := +def isAsciiAlphaNumCharSpec (c : Char) : Bool := isAscii c && isAlphaNum (UInt8.ofNat c.toNat) +/-- +Checks if `c` is valid after `\` in an HTTP `quoted-pair` (ASCII-only). +-/ +def isAsciiAlphaNumChar (c : Char) : Bool := + isAscii c ∧ Nat.testBit 0x07fffffe07fffffe03ff000000000000 c.toNat + +theorem isAsciiAlphaNumChar_eq_isAsciiAlphaNumCharSpec_on_ascii : + EqOnAscii isAsciiAlphaNumChar isAsciiAlphaNumCharSpec := by + decide + /-- Checks if a character is valid after the first character of a URI scheme. Valid characters are ASCII alphanumeric, `+`, `-`, and `.`. -/ @[expose] +def isValidSchemeCharSpec (c : Char) : Bool := + isAsciiAlphaNumCharSpec c || c == '+' || c == '-' || c == '.' + +/-- +Checks if a character is valid after the first character of a URI scheme. +Valid characters are ASCII alphanumeric, `+`, `-`, and `.`. +-/ def isValidSchemeChar (c : Char) : Bool := - isAsciiAlphaNumChar c || c == '+' || c == '-' || c == '.' + isAscii c ∧ Nat.testBit 0x07fffffe07fffffe03ff680000000000 c.toNat + +theorem isValidSchemeCharChar_eq_isValidSchemeCharCharSpec_on_ascii : + EqOnAscii isValidSchemeChar isValidSchemeCharSpec := by + decide /-- Checks if a character is valid for use in a domain name. Valid characters are ASCII alphanumeric, hyphens, and dots. -/ @[expose] +def isValidDomainNameCharSpec (c : Char) : Bool := + isAsciiAlphaNumCharSpec c || c == '-' || c == '.' + +/-- +Checks if a character is valid for use in a domain name. +-/ def isValidDomainNameChar (c : Char) : Bool := - isAsciiAlphaNumChar c || c == '-' || c == '.' + isAscii c ∧ Nat.testBit 0x07fffffe07fffffe03ff600000000000 c.toNat + +theorem isValidDomainNameChar_eq_isValidDomainNameCharSpec_on_ascii : + EqOnAscii isValidDomainNameChar isValidDomainNameCharSpec := by + decide /-- Checks if a byte is an unreserved character according to RFC 3986. Unreserved characters are: @@ -450,7 +481,10 @@ theorem isFragmentChar_eq_isFragmentCharSpec_on_ascii : /-- Checks if a byte is a valid character in a URI userinfo component according to RFC 3986. -`userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` +`userinfo = *( unreserved/ sub-delims / ":" )` + +Note: It avoids the pct-encoded of the original grammar because it is used with `Encoding.lean` +that provides it. -/ def isUserInfoCharSpec (c : UInt8) : Bool := isUnreservedSpec c || isSubDelimsSpec c || c = ':'.toUInt8 From cdfd24171aa0a0efe462b8c604a78b19f23ceef8 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Thu, 26 Feb 2026 16:15:03 -0300 Subject: [PATCH 29/30] fix: test --- tests/lean/run/async_http_uri.lean | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/lean/run/async_http_uri.lean b/tests/lean/run/async_http_uri.lean index dedf719fa18a..7d5942292f0e 100644 --- a/tests/lean/run/async_http_uri.lean +++ b/tests/lean/run/async_http_uri.lean @@ -763,15 +763,15 @@ info: none -- URI Builder Tests -- ============================================================================ --- Domain names longer than 253 characters are rejected. +-- Domain names longer than 255 characters are rejected. #guard let label := String.ofList (List.replicate 63 'a') - let longDomain := s!"{label}.{label}.{label}.{label}" + let longDomain := s!"{label}.{label}.{label}.{label}." (URI.DomainName.ofString? longDomain).isNone #guard let label := String.ofList (List.replicate 63 'a') - let longDomain := s!"{label}.{label}.{label}.{label}" + let longDomain := s!"{label}.{label}.{label}.{label}." (URI.Builder.empty.setHost? longDomain).isNone /-- From 3aa02eede32ff53222591bd8dedf71e106fcba92 Mon Sep 17 00:00:00 2001 From: Sofia Rodrigues Date: Fri, 27 Feb 2026 14:31:09 -0300 Subject: [PATCH 30/30] fix: comments --- src/Std/Internal/Http/Data/URI/Basic.lean | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Std/Internal/Http/Data/URI/Basic.lean b/src/Std/Internal/Http/Data/URI/Basic.lean index ce66de32810d..574db54e78bc 100644 --- a/src/Std/Internal/Http/Data/URI/Basic.lean +++ b/src/Std/Internal/Http/Data/URI/Basic.lean @@ -49,8 +49,7 @@ Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1 abbrev IsValidScheme (s : String) : Prop := IsLowerCase s ∧ s.toList.all isValidSchemeChar ∧ - (s.toList.head?.map isAlphaChar |>.getD false) ∧ -- ALPHA - IsLowerCase s -- An implementation should accept uppercase letters as equivalent to lowercase in scheme names + (s.toList.head?.map isAlphaChar |>.getD false) -- first character must be ALPHA /-- URI scheme identifier (e.g., "http", "https", "ftp"). @@ -75,7 +74,7 @@ def ofString? (s : String) : Option Scheme := none /-- -Creates a `Scheme` from a string, normalizing to lowercase. Panicks if invalid. +Creates a `Scheme` from a string, normalizing to lowercase. Panics if invalid. -/ def ofString! (s : String) : Scheme := match ofString? s with @@ -152,7 +151,7 @@ def isValidDomainLabel (s : String) : Bool := /-- Proposition that asserts `s` is a valid dot-separated domain name. -Each label must satisfy `IsValidDomainLabel`, and the full name must be at most 253 characters. +Each label must satisfy `IsValidDomainLabel`, and the full name must be at most 255 characters. -/ abbrev IsValidDomainName (s : String) : Prop := let labels := s.splitOn "."