Skip to content

Commit c3beb44

Browse files
authored
🐛 fix decode dot in keys (#15)
* 🐛 fix key decoding to treat dots consistently with values and update decodeKey/decodeValue visibility * 🐛 handle encoded dots and nested brackets in key parsing for dot notation * ✅ add comprehensive tests for encoded dot behavior in keys to ensure C# parity and edge case coverage * ✅ expand tests for key decoding with encoded dots and custom decoder behavior * 🔇 suppress deprecation warnings in DecodeOptionsSpec * ♻️ refactor dot-to-bracket conversion and key splitting to improve handling of top-level dots and bracket segments * 💡 clarify documentation for encoded dot handling in key decoding and parser logic * 🔥 refactor defaultDecode signature to remove unused DecodeKind parameter in key decoding * ✅ update DecodeOptionsSpec to use public decode method instead of callDefaultDecode reflection helper * 🔥 remove unused protectEncodedDotsForKeys utility from DecodeOptions * ✅ expand DecodeSpec coverage for encoded dot behavior in keys and C# parity scenarios * 💡 clarify decodeDotInKeys documentation and improve decodeKey/decodeValue convenience methods with default charset
1 parent 97c712b commit c3beb44

File tree

4 files changed

+464
-143
lines changed

4 files changed

+464
-143
lines changed

qs-kotlin/src/main/kotlin/io/github/techouse/qskotlin/internal/Decoder.kt

Lines changed: 86 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package io.github.techouse.qskotlin.internal
22

33
import io.github.techouse.qskotlin.enums.Duplicates
44
import io.github.techouse.qskotlin.enums.Sentinel
5+
import io.github.techouse.qskotlin.internal.Decoder.dotToBracketTopLevel
56
import io.github.techouse.qskotlin.models.DecodeOptions
67
import io.github.techouse.qskotlin.models.Undefined
78
import java.nio.charset.Charset
@@ -41,6 +42,7 @@ internal object Decoder {
4142

4243
/**
4344
* Parses a query string into a map of key-value pairs, handling various options for decoding.
45+
* Percent-encoded brackets `%5B`/`%5D` are normalized to literal `[`/`]` before splitting.
4446
*
4547
* @param str The query string to parse.
4648
* @param options The decoding options that affect how the string is parsed.
@@ -117,7 +119,7 @@ internal object Decoder {
117119
// Decode the key slice as a key; values decode as values
118120
key = options.decodeKey(part.take(pos), charset).orEmpty()
119121
value =
120-
Utils.apply<Any?>(
122+
Utils.apply(
121123
parseListValue(
122124
part.substring(pos + 1),
123125
options,
@@ -196,7 +198,7 @@ internal object Decoder {
196198
when {
197199
options.allowEmptyLists &&
198200
(leaf == "" || (options.strictNullHandling && leaf == null)) ->
199-
mutableListOf<Any?>()
201+
mutableListOf()
200202
else -> Utils.combine<Any?>(emptyList<Any?>(), leaf)
201203
}
202204
} else {
@@ -278,18 +280,29 @@ internal object Decoder {
278280
}
279281

280282
/**
281-
* Converts a dot notation key to bracket notation at the top level.
283+
* Convert top-level dot segments into bracket segments, preserving dots inside brackets and
284+
* ignoring degenerate top-level dots.
282285
*
283-
* @param s The string to convert, which may contain dot notation.
284-
* @return The converted string with brackets replacing dots at the top level.
286+
* Rules:
287+
* - Only dots at depth == 0 split. Dots inside `\[\]` are preserved.
288+
* - Percent-encoded dots (`%2E`/`%2e`) never split here (they may map to '.' later).
289+
* - Degenerates:
290+
* * leading '.' → preserved (e.g., `".a"` stays `".a"`),
291+
* * double dots `"a..b"` → the first dot is preserved (`"a.\[b]"`),
292+
* * trailing dot `"a."` → trailing '.' is preserved and ignored by the splitter.
293+
*
294+
* Examples:
295+
* - `user.email.name` → `user\[email]\[name]`
296+
* - `a\[b].c` → `a\[b]\[c]`
297+
* - `a\[.].c` → `a\[.]\[c]`
298+
* - `a%2E\[b]` → remains `a%2E\[b]` (no split here)
285299
*/
286300
private fun dotToBracketTopLevel(s: String): String {
287301
val sb = StringBuilder(s.length)
288302
var depth = 0
289303
var i = 0
290304
while (i < s.length) {
291-
val ch = s[i]
292-
when (ch) {
305+
when (val ch = s[i]) {
293306
'[' -> {
294307
depth++
295308
sb.append(ch)
@@ -302,21 +315,41 @@ internal object Decoder {
302315
}
303316
'.' -> {
304317
if (depth == 0) {
305-
// collect the next segment name (stop at '.' or '[')
306-
val start = ++i
307-
var j = start
308-
while (j < s.length && s[j] != '.' && s[j] != '[') j++
309-
if (j > start) {
310-
sb.append('[').append(s, start, j).append(']')
311-
i = j
312-
} else {
313-
sb.append('.') // nothing to convert
318+
// Look ahead to decide what to do with a top‑level dot
319+
val hasNext = i + 1 < s.length
320+
val next = if (hasNext) s[i + 1] else '\u0000'
321+
when {
322+
// Degenerate ".[" → skip the dot so "a.[b]" behaves like "a[b]"
323+
next == '[' -> {
324+
i++ // consume the '.'
325+
}
326+
// Preserve literal dot for "a." (trailing) and for "a..b" (the first
327+
// dot)
328+
!hasNext || next == '.' -> {
329+
sb.append('.')
330+
i++
331+
}
332+
else -> {
333+
// Normal split: convert a.b → a[b] at top level
334+
val start = ++i
335+
var j = start
336+
while (j < s.length && s[j] != '.' && s[j] != '[') j++
337+
sb.append('[').append(s, start, j).append(']')
338+
i = j
339+
}
314340
}
315341
} else {
316342
sb.append('.')
317343
i++
318344
}
319345
}
346+
'%' -> {
347+
// Preserve percent sequences verbatim at top level. Encoded dots (%2E/%2e)
348+
// are *not* used as separators here; they may be mapped to '.' later
349+
// when parsing segments (see DecodeOptions.defaultDecode/parseObject).
350+
sb.append('%')
351+
i++
352+
}
320353
else -> {
321354
sb.append(ch)
322355
i++
@@ -327,14 +360,20 @@ internal object Decoder {
327360
}
328361

329362
/**
330-
* Splits a key into segments based on brackets and dots, handling depth and strictness.
363+
* Split a key into segments based on balanced brackets.
364+
*
365+
* Notes:
366+
* - Top-level dot splitting (`a.b` → `a\[b]`) happens earlier via [dotToBracketTopLevel] when
367+
* [allowDots] is true.
368+
* - Unterminated '[': the entire key is treated as a single literal segment (qs semantics).
369+
* - If [strictDepth] is false and depth is exceeded, the remainder is kept as one final bracket
370+
* segment.
331371
*
332372
* @param originalKey The original key to split.
333-
* @param allowDots Whether to allow dots in the key.
334-
* @param maxDepth The maximum depth for splitting.
335-
* @param strictDepth Whether to enforce strict depth limits.
336-
* @return A list of segments derived from the original key.
337-
* @throws IndexOutOfBoundsException if the depth exceeds maxDepth and strictDepth is true.
373+
* @param allowDots Whether to allow top-level dot splitting (already applied upstream).
374+
* @param maxDepth The maximum number of bracket segments to collect.
375+
* @param strictDepth When true, exceeding [maxDepth] throws; when false, the remainder is a
376+
* single trailing segment.
338377
*/
339378
internal fun splitKeyIntoSegments(
340379
originalKey: String,
@@ -360,9 +399,31 @@ internal object Decoder {
360399
var open = first
361400
var depth = 0
362401
while (open >= 0 && depth < maxDepth) {
363-
val close = key.indexOf(']', open + 1)
364-
if (close < 0) break
365-
segments.add(key.substring(open, close + 1)) // e.g. "[p]" or "[]"
402+
var i2 = open + 1
403+
var level = 1
404+
var close = -1
405+
406+
// Balance nested '[' and ']' within the same group,
407+
// so "[with[inner]]" is treated as one segment.
408+
while (i2 < key.length) {
409+
val ch2 = key[i2]
410+
if (ch2 == '[') {
411+
level++
412+
} else if (ch2 == ']') {
413+
level--
414+
if (level == 0) {
415+
close = i2
416+
break
417+
}
418+
}
419+
i2++
420+
}
421+
422+
if (close < 0) {
423+
break // unterminated group; stop collecting
424+
}
425+
426+
segments.add(key.substring(open, close + 1)) // includes the surrounding [ ]
366427
depth++
367428
open = key.indexOf('[', close + 1)
368429
}

qs-kotlin/src/main/kotlin/io/github/techouse/qskotlin/models/DecodeOptions.kt

Lines changed: 15 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,10 @@ data class DecodeOptions(
145145
/**
146146
* Effective `decodeDotInKeys` value.
147147
*
148-
* Defaults to `false` when unspecified. When `true`, encoded dots (`%2E`/`%2e`) inside key
149-
* segments are mapped to `.` **after** splitting, without introducing extra dot‑splits.
148+
* Defaults to `false` when unspecified. Inside bracket segments, percent-decoding will
149+
* naturally yield '.' from `%2E/%2e`. `decodeDotInKeys` controls whether encoded dots at the
150+
* top level are treated as additional split points; it does not affect the literal '.' produced
151+
* by percent-decoding inside bracket segments.
150152
*/
151153
val getDecodeDotInKeys: Boolean
152154
get() = decodeDotInKeys ?: false
@@ -168,7 +170,8 @@ data class DecodeOptions(
168170
* Uses the provided [decoder] when set; otherwise falls back to [Utils.decode]. For backward
169171
* compatibility, a [legacyDecoder] `(value, charset)` can be supplied and is adapted
170172
* internally. The [kind] will be [DecodeKind.KEY] for keys (and key segments) and
171-
* [DecodeKind.VALUE] for values.
173+
* [DecodeKind.VALUE] for values, and is forwarded to custom decoders. The library default does
174+
* not vary decoding based on [kind].
172175
*/
173176
internal fun decode(
174177
value: String?,
@@ -180,102 +183,29 @@ data class DecodeOptions(
180183
return if (d != null) {
181184
d.decode(value, charset, kind) // honor nulls from user decoder
182185
} else {
183-
defaultDecode(value, charset, kind)
186+
defaultDecode(value, charset)
184187
}
185188
}
186189

187190
/**
188191
* Default library decode.
189192
*
190-
* For [DecodeKind.KEY], protects encoded dots (`%2E`/`%2e`) **before** percent‑decoding so key
191-
* splitting and post‑split mapping run on the intended tokens.
193+
* Keys are decoded identically to values via [Utils.decode], which percent‑decodes `%2E/%2e` to
194+
* '.'. Whether a '.' participates in key splitting is decided by the parser (based on options).
192195
*/
193-
private fun defaultDecode(value: String?, charset: Charset?, kind: DecodeKind): Any? {
196+
private fun defaultDecode(value: String?, charset: Charset?): Any? {
194197
if (value == null) return null
195-
if (kind == DecodeKind.KEY) {
196-
val protected =
197-
protectEncodedDotsForKeys(value, includeOutsideBrackets = (allowDots == true))
198-
return Utils.decode(protected, charset)
199-
}
198+
// Keys decode exactly like values; do NOT “protect” encoded dots.
200199
return Utils.decode(value, charset)
201200
}
202201

203-
/**
204-
* Double‑encode %2E/%2e in KEY strings so the percent‑decoder does not turn them into '.' too
205-
* early.
206-
*
207-
* When [includeOutsideBrackets] is true, occurrences both inside and outside bracket segments
208-
* are protected. Otherwise, only those **inside** `[...]` are protected. Note: only literal
209-
* `[`/`]` affect depth; percent‑encoded brackets (`%5B`/`%5D`) are treated as content, not
210-
* structure.
211-
*/
212-
private fun protectEncodedDotsForKeys(input: String, includeOutsideBrackets: Boolean): String {
213-
val pct = input.indexOf('%')
214-
if (pct < 0) return input
215-
if (input.indexOf("2E", pct) < 0 && input.indexOf("2e", pct) < 0) return input
216-
val n = input.length
217-
val sb = StringBuilder(n + 8)
218-
var depth = 0
219-
var i = 0
220-
while (i < n) {
221-
when (val ch = input[i]) {
222-
'[' -> {
223-
depth++
224-
sb.append(ch)
225-
i++
226-
}
227-
']' -> {
228-
if (depth > 0) depth--
229-
sb.append(ch)
230-
i++
231-
}
232-
'%' -> {
233-
if (
234-
i + 2 < n &&
235-
input[i + 1] == '2' &&
236-
(input[i + 2] == 'E' || input[i + 2] == 'e')
237-
) {
238-
val inside = depth > 0
239-
if (inside || includeOutsideBrackets) {
240-
sb.append("%25").append(if (input[i + 2] == 'E') "2E" else "2e")
241-
} else {
242-
sb.append('%').append('2').append(input[i + 2])
243-
}
244-
i += 3
245-
} else {
246-
sb.append(ch)
247-
i++
248-
}
249-
}
250-
else -> {
251-
sb.append(ch)
252-
i++
253-
}
254-
}
255-
}
256-
return sb.toString()
257-
}
258-
259-
/**
260-
* Back‑compat helper: decode a value without key/value kind context.
261-
*
262-
* Prefer calling [decode] directly (or [decodeKey]/[decodeValue] for explicit context).
263-
*/
264-
@Deprecated(
265-
message =
266-
"Deprecated: use decodeKey/decodeValue (or decode(value, charset, kind)) to honor key/value context. This will be removed in the next major.",
267-
replaceWith = ReplaceWith("decode(value, charset)"),
268-
level = DeprecationLevel.WARNING,
269-
)
270-
@Suppress("unused")
271-
@JvmOverloads
272-
fun getDecoder(value: String?, charset: Charset? = null): Any? = decode(value, charset)
273-
274202
/** Convenience: decode a key to String? */
275-
internal fun decodeKey(value: String?, charset: Charset?): String? =
203+
@JvmOverloads
204+
fun decodeKey(value: String?, charset: Charset? = this.charset): String? =
276205
decode(value, charset, DecodeKind.KEY)?.toString() // keys are always coerced to String
277206

278207
/** Convenience: decode a value */
279-
internal fun decodeValue(value: String?, charset: Charset?): Any? =
208+
@JvmOverloads
209+
fun decodeValue(value: String?, charset: Charset? = this.charset): Any? =
280210
decode(value, charset, DecodeKind.VALUE)
281211
}

0 commit comments

Comments
 (0)