Skip to content

Commit cd52aab

Browse files
committed
SkipEncodeSpaces renamed to DoMinimalEncode
- Skips encode of numerous characters that are permitted in QueryString values, not just spaces - new support-function Format-UrlComponent
1 parent c5c4e66 commit cd52aab

File tree

3 files changed

+194
-25
lines changed

3 files changed

+194
-25
lines changed
402 Bytes
Binary file not shown.

src/UrlQueryStringParser/UrlQueryStringParser.psm1

Lines changed: 184 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@ function ConvertTo-UrlQueryString {
1919
[Parameter()]
2020
[string] $ContinuationOfString,
2121

22-
# Leave the space-characters as space characters, which some browsers support.
23-
[switch] $SkipEncodeSpaces
22+
# Only encode characters that *must* be encoded instead of using standard encode..
23+
[Alias('SkipEncodeSpaces')]
24+
[switch] $DoMinimalEncode
2425
)
2526
process {
2627
[string] $result = "" + $ContinuationOfString
2728
$hasContent = $Members.Keys |
28-
Where-Object { Test-ValueIsWriteable $Members[$_]} |
29+
Where-Object { Test-UrlQueryStringValueIsWriteable $Members[$_]} |
2930
Foreach-Object { $true } |
3031
Select-Object -First 1
3132

@@ -40,24 +41,31 @@ function ConvertTo-UrlQueryString {
4041
#
4142
# Note: -eq is NOT commutitive here, $false -eq '' but '' -ne $false. The only falsey object we want
4243
# is empty strings, and other forms of this code will include that.
43-
if (Test-ValueIsWriteable $foundValue) {
44+
if (Test-UrlQueryStringValueIsWriteable $foundValue) {
4445
$valueArray = @($foundValue)
4546
if($value -is [array]) {
4647
$valueArray = $foundValue
4748
}
49+
$field = if ($DoMinimalEncode) {
50+
$key | Format-UrlComponent -AsField
51+
} else {
52+
$key | Format-UrlComponent
53+
}
54+
4855
foreach ($value in $valueArray) {
49-
$value = [uri]::EscapeDataString($value.ToString())
50-
if ($SkipEncodeSpaces) {
51-
# only want to urlencode chars that aren't spaces in value.
52-
$value = $value -replace '%20', ' '
53-
}
5456
if($result.Length -gt 1) {
5557
$result += "&"
5658
}
59+
5760
if ($value -eq $true) {
58-
$result += $key
61+
$result += $field
5962
} else {
60-
$result += "$key=$value"
63+
$value = if ($DoMinimalEncode) {
64+
$value | Format-UrlComponent -AsValue
65+
} else {
66+
$value | Format-UrlComponent
67+
}
68+
$result += "$field=$value"
6169
}
6270
}
6371
}
@@ -71,8 +79,8 @@ function ConvertFrom-UrlQueryString {
7179
<#
7280
.SYNOPSIS
7381
Takes the given URL query string (optionally starts with "?") and converts it into a Powershell object
74-
(OrderedDictionary). Valueless query members (?key1&key2) will be included as $true. Empty query members
75-
(?key1=&key2) will be included as empty-string ''.
82+
(OrderedDictionary). Valueless query members (?field1&field2) will be included as $true. Empty query members
83+
(?field1=&field2) will be included as empty-string ''.
7684
#>
7785
[OutputType([Collections.Specialized.OrderedDictionary])]
7886
[CmdletBinding()]
@@ -91,16 +99,16 @@ function ConvertFrom-UrlQueryString {
9199
foreach($entry in $queryEntries) {
92100
if ($entry -like '*=*') {
93101
$equalsCharIndex = $entry.IndexOf("=")
94-
$key = $entry.Substring(0, $equalsCharIndex)
95-
$key = [uri]::UnescapeDataString($key)
102+
$field = $entry.Substring(0, $equalsCharIndex)
103+
$field = [uri]::UnescapeDataString($field)
96104
$value = $entry.Substring($equalsCharIndex + 1, $entry.Length - $equalsCharIndex - 1)
97105
$value = [uri]::UnescapeDataString($value)
98-
$existingValue = $result[$key]
106+
$existingValue = $result[$field]
99107
if ($existingValue) {
100108
# store as array (foreach flattens array)
101-
$result[$key] = ($existingValue, $value | ForEach-Object {$_})
109+
$result[$field] = ($existingValue, $value | ForEach-Object {$_})
102110
} else {
103-
$result[$key] = $value
111+
$result[$field] = $value
104112
}
105113
} elseif ($entry) {
106114
$entry = [uri]::UnescapeDataString($entry)
@@ -114,21 +122,173 @@ function ConvertFrom-UrlQueryString {
114122
}
115123
}
116124

125+
126+
function Format-UrlComponent {
127+
<#
128+
.SYNOPSIS
129+
Format the given string as a URL component. If used in "standard" mode it will apply the default encoding,
130+
but in all other cases it will attempt the minimum encoding, including undoing the encoding of characters
131+
that browsers are flexible about for readability.
132+
#>
133+
[CmdletBinding(DefaultParameterSetName = "AsStandard")]
134+
param (
135+
[Parameter(ValueFromPipeline)]
136+
[string] $InputObject,
137+
138+
[Parameter(ParameterSetName = "AsCommon")]
139+
[switch] $AsCommon,
140+
141+
[Parameter(ParameterSetName = "AsPath")]
142+
[switch] $AsPath,
143+
144+
[Parameter(ParameterSetName = "AsField")]
145+
[switch] $AsField,
146+
147+
[Parameter(ParameterSetName = "AsValue")]
148+
[switch] $AsValue
149+
)
150+
process {
151+
$InputObject = [uri]::EscapeDataString($InputObject.ToString())
152+
$replacements = $null
153+
$regex = $null
154+
155+
if ($AsCommon) {
156+
$replacements = $urlCommonDecodes
157+
$regex = $urlCommonDecodesRegex
158+
} elseif ($AsPath) {
159+
$replacements = $urlPathComponentDecodes
160+
$regex = $urlPathComponentDecodesRegex
161+
} elseif ($AsField) {
162+
$replacements = $urlQueryStringFieldComponentDecodes
163+
$regex = $urlQueryStringFieldComponentDecodesRegex
164+
} elseif ($AsValue) {
165+
$replacements = $urlQueryStringValueComponentDecodes
166+
$regex = $urlQueryStringValueComponentDecodesRegex
167+
}
168+
169+
if ($regex) {
170+
$InputObject = Format-StringWithHashtable $InputObject -Replacements $replacements -Regex $regex
171+
}
172+
173+
# return
174+
$InputObject
175+
}
176+
}
177+
117178
Export-ModuleMember -Function * -Alias *
118179

119-
#region private functions
180+
#region private objects
120181

121-
function Test-ValueIsWriteable {
182+
function Test-UrlQueryStringValueIsWriteable {
183+
<#
184+
.SYNOPSIS
185+
Test if the given value is writeable as a query-string value.
186+
#>
122187
[CmdletBinding()]
123188
param(
124-
[Parameter(ValueFromPipeline)]
125-
126189
# The value to test. Can't use [string] here because that converts $null into ''
127-
[object] $Value
190+
[Parameter(ValueFromPipeline)]
191+
[object] $InputObject
128192
)
129193
process {
130194
#return
131-
'' -eq $Value -or $Value
195+
'' -eq $InputObject -or $InputObject
196+
}
197+
}
198+
199+
200+
function ConvertTo-RegularExpression {
201+
<#
202+
.SYNOPSIS
203+
Convert a hashtable's keys to a regular expression suitable for Format-StringWithHashtable
204+
#>
205+
[OutputType([Text.RegularExpressions.Regex])]
206+
[CmdletBinding()]
207+
param(
208+
[Parameter(ValueFromPipeline)]
209+
[hashtable] $Replacements
210+
)
211+
process {
212+
$regexString = [string]::Join("|",
213+
($Replacements.Keys | ForEach-Object {[Text.RegularExpressions.Regex]::Escape($_)})
214+
)
215+
216+
# return
217+
[Text.RegularExpressions.Regex]::new($regexString,
218+
[Text.RegularExpressions.RegexOptions]::Compiled -bor [Text.RegularExpressions.RegexOptions]::IgnoreCase
219+
)
220+
}
221+
}
222+
223+
224+
# the following decodes are usable by all parts of the URL that we care about (everything but the scheme and authority)
225+
$urlCommonDecodes = @{
226+
'%2F' = '/'
227+
'%20' = ' '
228+
'%40' = '@'
229+
'%5B' = '['
230+
'%5D' = ']'
231+
'%24' = '$'
232+
'%2C' = ','
233+
'%3B' = ';'
234+
}
235+
$urlCommonDecodesRegex = ConvertTo-RegularExpression $urlCommonDecodes
236+
237+
$urlPathComponentDecodes = $urlCommonDecodes + @{
238+
'%3D' = '='
239+
}
240+
$urlPathComponentDecodesRegex = ConvertTo-RegularExpression $urlPathComponentDecodes
241+
242+
$urlQueryStringFieldComponentDecodes = $urlCommonDecodes + @{
243+
'%3A' = ':'
244+
'%3F' = '?'
245+
}
246+
$urlQueryStringFieldComponentDecodesRegex = ConvertTo-RegularExpression $urlQueryStringFieldComponentDecodes
247+
248+
# characters that do not appear to be invalid in a QueryString
249+
# [example](https://www.google.com/search?query=example+colon:at@+slash/+brackets[[]+dollar$+comma,+semicolon;+question?+space space)
250+
$urlQueryStringValueComponentDecodes = $urlQueryStringFieldComponentDecodes + @{
251+
'%3D' = '='
252+
}
253+
$urlQueryStringValueComponentDecodesRegex = ConvertTo-RegularExpression $urlQueryStringValueComponentDecodes
254+
255+
256+
function Format-StringWithHashtable {
257+
<#
258+
.SYNOPSIS
259+
Takes the given string and a hashtable and replaces all instances of the table keys within that string with
260+
the corresponding table values. Uses regular expressions, so for optimization purposes the pre-compiled
261+
regular expression can be provided as -Regex. That regex should be generated by
262+
`ConvertTo-RegularExpression $myReplacementsTable`.
263+
#>
264+
[OutputType([string])]
265+
[CmdletBinding()]
266+
param(
267+
[Parameter(ValueFromPipeline)]
268+
[string] $InputObject,
269+
270+
[Parameter(Mandatory)]
271+
[hashtable] $Replacements,
272+
273+
# must be generated from -Replacements using ConvertTo-RegularExpression. If not provided it will be
274+
# created at run-time but this may be less-efficient.
275+
[Parameter()]
276+
[Text.RegularExpressions.Regex]
277+
$Regex = $null
278+
)
279+
begin {
280+
if (-not $Regex) {
281+
$Regex = ConvertTo-RegularExpression $Replacements
282+
}
283+
}
284+
process {
285+
$matchEvaluator = {
286+
param([Text.RegularExpressions.Match] $match)
287+
$Replacements[$match.Value]
288+
}
289+
290+
# return
291+
$Regex.Replace($InputObject, $matchEvaluator)
132292
}
133293
}
134294

test/Test-UrlQueryStringParser.ps1

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,19 @@ if ((ConvertTo-UrlQueryString @{} -ContinuationOfString '?bar=baz') -ne "?bar=ba
2727
throw "Continuation with empty failed."
2828
}
2929

30-
if ((ConvertTo-UrlQueryString @{foo='bar baz quux'} -SkipEncodeSpaces) -ne "?foo=bar baz quux") {
30+
if ((ConvertTo-UrlQueryString @{foo='bar baz quux'} -DoMinimalEncode) -ne "?foo=bar baz quux") {
3131
throw "Skip encoding spaces failed."
3232
}
3333

34+
if ((ConvertTo-UrlQueryString ([ordered]@{
35+
allow = 'example equals= colon: at@ slash/ brackets[[] dollar$ comma, semicolon; question? parens()) star* exclaim! space space'
36+
disallow = "example hash# ampersand& percent% plus+ tab`t linebreak`r`n "
37+
}) -DoMinimalEncode) -ne '?allow=example equals= colon: at@ slash/ brackets[[] dollar$ comma, semicolon; question? parens()) star* exclaim! space space' +
38+
'&disallow=example hash%23 ampersand%26 percent%25 plus%2B tab%09 linebreak%0D%0A '
39+
) {
40+
throw "Skip encoding extra characters failed."
41+
}
42+
3443
if ((ConvertTo-UrlQueryString @{foo='bar baz quux'}) -ne "?foo=bar%20baz%20quux") {
3544
throw "Encoding spaces failed."
3645
}

0 commit comments

Comments
 (0)