Skip to content

Commit 40a22cb

Browse files
committed
Fix percent decoding
2 parents 633263c + 0545b81 commit 40a22cb

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

library/Iri/Parsing/Attoparsec/Text.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ hexadecimalDigit =
218218
then return (x - 55)
219219
else
220220
if x >= 97 && x < 103
221-
then return (x - 97)
221+
then return (x - 87)
222222
else fail ("Not a hexadecimal digit: " <> show c)
223223

224224
{-# INLINEABLE query #-}

test/Main.hs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module Main where
22

3+
import qualified Data.Text as T
34
import qualified Iri.Parsing.ByteString as D
45
import qualified Iri.Parsing.Text as F
56
import Iri.QuasiQuoter
@@ -80,6 +81,27 @@ main =
8081
"http://点心和烤鸭.w3.mag.keio.ac.jp"
8182
"http://xn--0trv4xfvn8el34t.w3.mag.keio.ac.jp/"
8283
],
84+
testCase "Hex digit parsing bug reproduction - lowercase hex digits"
85+
$ do
86+
-- Bug: lowercase hex digits 'a'-'f' were calculated as (ascii - 97) instead of (ascii - 87)
87+
-- This caused %6a (should be ASCII 106 = 'j') to be parsed incorrectly
88+
-- %6a = 6*16 + 10 = 106 = 'j', but with bug: 6*16 + 0 = 96 = '`'
89+
let result = F.iri "https://example.com/test%6a" -- should be 'j'
90+
case result of
91+
Left err -> assertFailure $ "Should parse successfully: " ++ T.unpack err
92+
Right iri ->
93+
let rendered = C.iri iri -- Use text rendering to see decoded content
94+
in assertBool "Should contain 'j' when correctly parsed" ('j' `T.elem` rendered),
95+
testCase "Lowercase hex digits a-f UTF-8 parsing"
96+
$ do
97+
-- Test that %c3%a9 (UTF-8 for é) parses correctly with lowercase hex
98+
-- This tests the fix for hex digits 'c' and 'a' which were affected by the bug
99+
let result = F.iri "https://example.com/caf%c3%a9"
100+
case result of
101+
Left _ -> assertFailure "Should parse UTF-8 encoded URL successfully"
102+
Right iri ->
103+
let rendered = C.iri iri -- Use text rendering to see the decoded UTF-8
104+
in assertBool "Should contain é character when correctly decoded" ('é' `T.elem` rendered),
83105
testGroup "Mess"
84106
$ [ testCase "1"
85107
$ assertEqual

0 commit comments

Comments
 (0)