Skip to content

Commit 32e8c1f

Browse files
authored
feature: support unicode length validation (#15)
1 parent 8d1ac02 commit 32e8c1f

File tree

4 files changed

+26
-24
lines changed

4 files changed

+26
-24
lines changed

lib/jsonschema.lua

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,22 @@ local function to_lua_code(var)
395395
return code .. "}"
396396
end
397397

398+
local function utf8_len_func(ctx)
399+
return sformat([[function(s)
400+
local c, j=0, 1
401+
while j <= #s do
402+
local cb = %s(s, j)
403+
if cb >= 0 and cb <= 127 then j = j + 1
404+
elseif cb >= 192 and cb <= 223 then j = j + 2
405+
elseif cb >= 224 and cb <= 239 then j = j + 3
406+
elseif cb >= 240 and cb <= 247 then j = j + 4
407+
end
408+
c = c + 1
409+
end
410+
return c
411+
end]], ctx:libfunc("string.byte"))
412+
end
413+
398414
generate_validator = function(ctx, schema)
399415
-- get type informations as they will be necessary anyway
400416
local datatype = ctx:localvar(sformat('%s(%s)',
@@ -747,15 +763,19 @@ generate_validator = function(ctx, schema)
747763
if schema.minLength or schema.maxLength or schema.pattern then
748764
ctx:stmt(sformat('if %s == "string" then', datatype))
749765
if schema.minLength then
750-
ctx:stmt(sformat(' if #%s < %d then', ctx:param(1), schema.minLength))
751-
ctx:stmt(sformat(' return false, %s("string too short, expected at least %d, got %%d", #%s)',
752-
ctx:libfunc('string.format'), schema.minLength, ctx:param(1)))
766+
ctx:stmt(sformat(' local utf8_len_func = %s', utf8_len_func(ctx)))
767+
ctx:stmt(sformat(' local c = utf8_len_func(%s)',ctx:param(1)))
768+
ctx:stmt(sformat(' if c < %d then', schema.minLength))
769+
ctx:stmt(sformat(' return false, %s("string too short, expected at least %d, got ") ..c',
770+
ctx:libfunc('string.format'), schema.minLength))
753771
ctx:stmt( ' end')
754772
end
755773
if schema.maxLength then
756-
ctx:stmt(sformat(' if #%s > %d then', ctx:param(1), schema.maxLength))
757-
ctx:stmt(sformat(' return false, %s("string too long, expected at most %d, got %%d", #%s)',
758-
ctx:libfunc('string.format'), schema.maxLength, ctx:param(1)))
774+
ctx:stmt(sformat(' local utf8_len_func = %s', utf8_len_func(ctx)))
775+
ctx:stmt(sformat(' local c = utf8_len_func(%s)',ctx:param(1)))
776+
ctx:stmt(sformat(' if c > %d then', schema.maxLength))
777+
ctx:stmt(sformat(' return false, %s("string too long, expected at most %d, got ") .. c',
778+
ctx:libfunc('string.format'), schema.maxLength))
759779
ctx:stmt( ' end')
760780
end
761781
if schema.pattern then

t/draft4.lua

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,6 @@ local blacklist = {
2525
['regexes are not anchored by default and are case sensitive'] = {
2626
['recognized members are accounted for'] = true, -- uses a unsupported pattern construct
2727
},
28-
['minLength validation'] = {
29-
['one supplementary Unicode code point is not long enough'] = true, -- unicode handling
30-
},
31-
['maxLength validation'] = {
32-
['two supplementary Unicode code points is long enough'] = true, -- unicode handling
33-
},
3428
['required validation'] = {
3529
['ignores arrays'] = true
3630
},

t/draft6.lua

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,6 @@ local blacklist = {
2525
['regexes are not anchored by default and are case sensitive'] = {
2626
['recognized members are accounted for'] = true, -- uses a unsupported pattern construct
2727
},
28-
['minLength validation'] = {
29-
['one supplementary Unicode code point is not long enough'] = true, -- unicode handling
30-
},
31-
['maxLength validation'] = {
32-
['two supplementary Unicode code points is long enough'] = true, -- unicode handling
33-
},
3428
['required validation'] = {
3529
['ignores arrays'] = true
3630
},

t/draft7.lua

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,6 @@ local blacklist = {
2525
['regexes are not anchored by default and are case sensitive'] = {
2626
['recognized members are accounted for'] = true, -- uses a unsupported pattern construct
2727
},
28-
['minLength validation'] = {
29-
['one supplementary Unicode code point is not long enough'] = true, -- unicode handling
30-
},
31-
['maxLength validation'] = {
32-
['two supplementary Unicode code points is long enough'] = true, -- unicode handling
33-
},
3428
['required validation'] = {
3529
['ignores arrays'] = true
3630
},

0 commit comments

Comments
 (0)