Skip to content

Commit 1d5d2fe

Browse files
committed
Updated string serialization, including for newToken("string").
Improved some error messages. Added tests for newToken().
1 parent 5d8600f commit 1d5d2fe

File tree

2 files changed

+134
-63
lines changed

2 files changed

+134
-63
lines changed

preprocess.lua

Lines changed: 60 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -954,28 +954,25 @@ function serialize(buffer, v)
954954
elseif vType == "string" then
955955
if v == "" then
956956
tableInsert(buffer, '""')
957+
return true
958+
end
959+
960+
local useApostrophe = v:find('"', 1, true) and not v:find("'", 1, true)
961+
local quote = useApostrophe and "'" or '"'
962+
963+
tableInsert(buffer, quote)
957964

958-
elseif fastStrings or not v:find"[^\32-\126\t\n]" then
965+
if fastStrings or not v:find"[^\32-\126\t\n]" then
959966
-- print(">> FAST", #v) -- DEBUG
960967

961-
local s = v:gsub("[%c\128-\255\"\\]", function(c)
962-
local s = ESCAPE_SEQUENCES[c] or F("\\%03d", c:byte())
963-
ESCAPE_SEQUENCES[c] = s -- Cache the result.
964-
return s
968+
local s = v:gsub((useApostrophe and "[\t\n\\']" or '[\t\n\\"]'), function(c)
969+
return ESCAPE_SEQUENCES[c] or errorf("Internal error. (%d)", c:byte())
965970
end)
966-
967-
tableInsert(buffer, '"')
968971
tableInsert(buffer, s)
969-
tableInsert(buffer, '"')
970972

971973
else
972974
-- print(">> SLOW", #v) -- DEBUG
973-
974-
local quote = (v:find('"', 1, true) and not v:find("'", 1, true)) and "'" or '"'
975-
local pos = 1
976-
local toMinimize = {}
977-
978-
tableInsert(buffer, quote)
975+
local pos = 1
979976

980977
-- @Speed: There are optimizations to be made here!
981978
while pos <= #v do
@@ -992,23 +989,14 @@ function serialize(buffer, v)
992989

993990
-- Anything else.
994991
else
995-
local b = v:byte(pos)
996-
tableInsert(buffer, F("\\%03d", b))
997-
if b <= 99 then tableInsert(toMinimize, #buffer) end
998-
pos = pos+1
999-
end
1000-
end
1001-
1002-
-- Minimize \nnn sequences that aren't followed by digits.
1003-
for _, i in ipairs(toMinimize) do
1004-
if not (buffer[i+1] and buffer[i+1]:find"^%d") then
1005-
buffer[i] = buffer[i]:gsub("0+(%d)", "%1")
992+
tableInsert(buffer, F((v:find("^%d", pos+1) and "\\%03d" or "\\%d"), v:byte(pos)))
993+
pos = pos + 1
1006994
end
1007995
end
1008-
1009-
tableInsert(buffer, quote)
1010996
end
1011997

998+
tableInsert(buffer, quote)
999+
10121000
elseif v == 1/0 then
10131001
tableInsert(buffer, "(1/0)")
10141002
elseif v == -1/0 then
@@ -1548,6 +1536,22 @@ function metaFuncs.getNextUsefulToken(tokens, i1, steps)
15481536
return nil
15491537
end
15501538

1539+
local numberFormatters = {
1540+
-- @Incomplete: Hexadecimal floats.
1541+
auto = function(n) return tostring(n) end,
1542+
integer = function(n) return F("%d", n) end,
1543+
int = function(n) return F("%d", n) end,
1544+
float = function(n) return F("%f", n):gsub("(%d)0+$", "%1") end,
1545+
scientific = function(n) return F("%e", n):gsub("(%d)0+e", "%1e"):gsub("0+(%d+)$", "%1") end,
1546+
SCIENTIFIC = function(n) return F("%E", n):gsub("(%d)0+E", "%1E"):gsub("0+(%d+)$", "%1") end,
1547+
e = function(n) return F("%e", n):gsub("(%d)0+e", "%1e"):gsub("0+(%d+)$", "%1") end,
1548+
E = function(n) return F("%E", n):gsub("(%d)0+E", "%1E"):gsub("0+(%d+)$", "%1") end,
1549+
hexadecimal = function(n) return (n == math.floor(n) and F("0x%x", n) or error("Hexadecimal floats not supported yet.", 3)) end,
1550+
HEXADECIMAL = function(n) return (n == math.floor(n) and F("0x%X", n) or error("Hexadecimal floats not supported yet.", 3)) end,
1551+
hex = function(n) return (n == math.floor(n) and F("0x%x", n) or error("Hexadecimal floats not supported yet.", 3)) end,
1552+
HEX = function(n) return (n == math.floor(n) and F("0x%X", n) or error("Hexadecimal floats not supported yet.", 3)) end,
1553+
}
1554+
15511555
-- newToken()
15521556
-- Create a new token. Different token types take different arguments.
15531557
-- token = newToken( tokenType, ... )
@@ -1560,7 +1564,7 @@ end
15601564
-- stringToken = newToken( "string", contents [, longForm=false ] )
15611565
-- whitespaceToken = newToken( "whitespace", contents )
15621566
-- ppEntryToken = newToken( "pp_entry", isDouble )
1563-
-- ppKeywordToken = newToken( "pp_keyword", ppKeyword )
1567+
-- ppKeywordToken = newToken( "pp_keyword", ppKeyword ) -- ppKeyword can be "@".
15641568
--
15651569
-- commentToken = { type="comment", representation=string, value=string, long=isLongForm }
15661570
-- identifierToken = { type="identifier", representation=string, value=string }
@@ -1590,7 +1594,7 @@ function metaFuncs.newToken(tokType, ...)
15901594
if tokType == "comment" then
15911595
local comment, long = ...
15921596
long = not not (long or comment:find"[\r\n]")
1593-
assert(type(comment) == "string")
1597+
assertarg(2, comment, "string")
15941598

15951599
local repr
15961600
if long then
@@ -1610,7 +1614,7 @@ function metaFuncs.newToken(tokType, ...)
16101614

16111615
elseif tokType == "identifier" then
16121616
local ident = ...
1613-
assert(type(ident) == "string")
1617+
assertarg(2, ident, "string")
16141618

16151619
if ident == "" then
16161620
error("Identifier length is 0.", 2)
@@ -1622,7 +1626,7 @@ function metaFuncs.newToken(tokType, ...)
16221626

16231627
elseif tokType == "keyword" then
16241628
local keyword = ...
1625-
assert(type(keyword) == "string")
1629+
assertarg(2, keyword, "string")
16261630

16271631
if not KEYWORDS[keyword] then
16281632
errorf(2, "Bad keyword '%s'.", keyword)
@@ -1633,33 +1637,23 @@ function metaFuncs.newToken(tokType, ...)
16331637
elseif tokType == "number" then
16341638
local n, numberFormat = ...
16351639
numberFormat = numberFormat or "auto"
1636-
assert(type(n) == "number")
1637-
1638-
-- Some of these are technically multiple other tokens. We could trigger an error but ehhh...
1639-
-- @Incomplete: Hexadecimal floats.
1640-
local numStr
1641-
= n ~= n and "(0/0)"
1642-
or n == 1/0 and "(1/0)"
1643-
or n == -1/0 and "(-1/0)"
1644-
or numberFormat == "auto" and tostring(n)
1645-
or numberFormat == "integer" and F("%d", n)
1646-
or numberFormat == "int" and F("%d", n)
1647-
or numberFormat == "float" and F("%f", n):gsub("(%d)0+$", "%1")
1648-
or numberFormat == "scientific" and F("%e", n):gsub("(%d)0+e", "%1e"):gsub("0+(%d+)$", "%1")
1649-
or numberFormat == "SCIENTIFIC" and F("%E", n):gsub("(%d)0+E", "%1E"):gsub("0+(%d+)$", "%1")
1650-
or numberFormat == "e" and F("%e", n):gsub("(%d)0+e", "%1e"):gsub("0+(%d+)$", "%1")
1651-
or numberFormat == "E" and F("%E", n):gsub("(%d)0+E", "%1E"):gsub("0+(%d+)$", "%1")
1652-
or numberFormat == "hexadecimal" and (n == math.floor(n) and F("0x%x", n) or error("Hexadecimal floats not supported yet.", 2))
1653-
or numberFormat == "HEXADECIMAL" and (n == math.floor(n) and F("0x%X", n) or error("Hexadecimal floats not supported yet.", 2))
1654-
or numberFormat == "hex" and (n == math.floor(n) and F("0x%x", n) or error("Hexadecimal floats not supported yet.", 2))
1655-
or numberFormat == "HEX" and (n == math.floor(n) and F("0x%X", n) or error("Hexadecimal floats not supported yet.", 2))
1656-
or errorf(2, "Invalid number format '%s'.", numberFormat)
1640+
assertarg(2, n, "number")
1641+
assertarg(3, numberFormat, "string")
1642+
1643+
-- Some of these are technically multiple other tokens. We could raise an error but ehhh...
1644+
local numStr = (
1645+
n ~= n and "(0/0)" or
1646+
n == 1/0 and "(1/0)" or
1647+
n == -1/0 and "(-1/0)" or
1648+
numberFormatters[numberFormat] and numberFormatters[numberFormat](n) or
1649+
errorf(2, "Invalid number format '%s'.", numberFormat)
1650+
)
16571651

16581652
return {type="number", representation=numStr, value=n}
16591653

16601654
elseif tokType == "punctuation" then
16611655
local symbol = ...
1662-
assert(type(symbol) == "string")
1656+
assertarg(2, symbol, "string")
16631657

16641658
-- Note: "!" and "!!" are of a different token type (pp_entry).
16651659
if not PUNCTUATION[symbol] then
@@ -1671,27 +1665,28 @@ function metaFuncs.newToken(tokType, ...)
16711665
elseif tokType == "string" then
16721666
local s, long = ...
16731667
long = not not long
1674-
assert(type(s) == "string")
1668+
assertarg(2, s, "string")
16751669

16761670
local repr
1671+
16771672
if long then
16781673
local equalSigns = ""
16791674

16801675
while s:find(F("]%s]", equalSigns), 1, true) do
1681-
equalSigns = equalSigns.."="
1676+
equalSigns = equalSigns .. "="
16821677
end
16831678

16841679
repr = F("[%s[%s]%s]", equalSigns, s, equalSigns)
16851680

16861681
else
1687-
repr = F("%q", s):gsub("\\\n", "\\n")
1682+
repr = toLua(s)
16881683
end
16891684

16901685
return {type="string", representation=repr, value=s, long=long}
16911686

16921687
elseif tokType == "whitespace" then
16931688
local whitespace = ...
1694-
assert(type(whitespace) == "string")
1689+
assertarg(2, whitespace, "string")
16951690

16961691
if whitespace == "" then
16971692
error("String is empty.", 2)
@@ -1703,24 +1698,26 @@ function metaFuncs.newToken(tokType, ...)
17031698

17041699
elseif tokType == "pp_entry" then
17051700
local double = ...
1706-
assert(type(double) == "boolean")
1701+
assertarg(2, double, "boolean")
17071702

17081703
local symbol = double and "!!" or "!"
17091704

17101705
return {type="pp_entry", representation=symbol, value=symbol, double=double}
17111706

17121707
elseif tokType == "pp_keyword" then
17131708
local keyword = ...
1714-
assert(type(keyword) == "string")
1709+
assertarg(2, keyword, "string")
17151710

1716-
if not PREPROCESSOR_KEYWORDS[keyword] then
1711+
if keyword == "@" then
1712+
return {type="pp_keyword", representation="@@", value="insert"}
1713+
elseif not PREPROCESSOR_KEYWORDS[keyword] then
17171714
errorf(2, "Bad preprocessor keyword '%s'.", keyword)
1715+
else
1716+
return {type="pp_keyword", representation="@"..keyword, value=keyword}
17181717
end
17191718

1720-
return {type="pp_keyword", representation="@"..keyword, value=keyword}
1721-
17221719
else
1723-
errorf(2, "Invalid token type '%s'.", tokType)
1720+
errorf(2, "Invalid token type '%s'.", tostring(tokType))
17241721
end
17251722
end
17261723

tests/suite.lua

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,80 @@ end)
333333

334334
addLabel("Library API")
335335

336+
doTest("Create tokens", function()
337+
local pp = ppChunk()
338+
339+
local function assertToken(tok, tokType, v, repr, extraK, extraV)
340+
if tok.type ~= tokType then error(tok.type, 2) end
341+
if tok.value ~= v then error(tok.value, 2) end
342+
if tok.representation ~= repr then error(tok.representation, 2) end
343+
if tok[extraK] ~= extraV then error(tostring(tok[extraK]), 2) end
344+
end
345+
346+
assert(not pcall(pp.newToken, "bad", nil))
347+
348+
-- Comment.
349+
assertToken(pp.newToken("comment", "foo", false), "comment", "foo", "--foo\n", "long", false)
350+
assertToken(pp.newToken("comment", "foo", true ), "comment", "foo", "--[[foo]]", "long", true )
351+
assertToken(pp.newToken("comment", "foo\nbar", false), "comment", "foo\nbar", "--[[foo\nbar]]", "long", true )
352+
assertToken(pp.newToken("comment", "foo\nbar]]", false), "comment", "foo\nbar]]", "--[=[foo\nbar]]]=]", "long", true )
353+
354+
-- Identifier.
355+
assertToken(pp.newToken("identifier", "foo"), "identifier", "foo", "foo", nil, nil)
356+
357+
-- Keyword.
358+
assertToken(pp.newToken("keyword", "if"), "keyword", "if", "if", nil, nil)
359+
360+
assert(not pcall(pp.newToken, "keyword", "bad"))
361+
362+
-- Number.
363+
assertToken(pp.newToken("number", 42, "auto" ), "number", 42, "42", nil, nil)
364+
assertToken(pp.newToken("number", -1.25, "auto" ), "number", -1.25, "-1.25", nil, nil)
365+
assertToken(pp.newToken("number", 5.75, "e" ), "number", 5.75, "5.75e+0", nil, nil)
366+
assertToken(pp.newToken("number", 255, "HEX" ), "number", 255, "0xFF", nil, nil)
367+
assertToken(pp.newToken("number", 1/0, "auto" ), "number", 1/0, "(1/0)", nil, nil)
368+
assertToken(pp.newToken("number", -1/0, "float"), "number", -1/0, "(-1/0)", nil, nil)
369+
370+
local tok = pp.newToken("number", 0/0, "hex")
371+
assert(tok.type == "number", tok.type)
372+
assert(tok.value ~= tok.value, tok.value)
373+
assert(tok.representation == "(0/0)", tok.representation)
374+
375+
-- Punctuation.
376+
assertToken(pp.newToken("punctuation", "=="), "punctuation", "==", "==", nil, nil)
377+
378+
assert(not pcall(pp.newToken, "punctuation", "!="))
379+
380+
-- String.
381+
assertToken(pp.newToken("string", "foo", false), "string", "foo", '"foo"', "long", false)
382+
assertToken(pp.newToken("string", 'foo"\nbar', false), "string", 'foo"\nbar', "'foo\"\\nbar'", "long", false)
383+
assertToken(pp.newToken("string", "foo", true ), "string", "foo", "[[foo]]", "long", true )
384+
assertToken(pp.newToken("string", "foo]]", true ), "string", "foo]]", "[=[foo]]]=]", "long", true )
385+
386+
assertToken(
387+
pp.newToken("string", "\0\1\2\3\4\5\6\7\8\9\10\11\12\13\14\15\16\17\18\19\20\21\22\23\24\25\26\27\28\29\30\0310\127", false),
388+
"string",
389+
"\0\1\2\3\4\5\6\7\8\9\10\11\12\13\14\15\16\17\18\19\20\21\22\23\24\25\26\27\28\29\30\0310\127",
390+
[["\0\1\2\3\4\5\6\a\b\t\n\v\f\r\14\15\16\17\18\19\20\21\22\23\24\25\26\27\28\29\30\0310\127"]],
391+
"long", false
392+
)
393+
394+
-- Whitespace.
395+
assertToken(pp.newToken("whitespace", "\t \n"), "whitespace", "\t \n", "\t \n", nil, nil)
396+
397+
assert(not pcall(pp.newToken, "whitespace", "bad"))
398+
399+
-- Preprocessor entry.
400+
assertToken(pp.newToken("pp_entry", false), "pp_entry", "!", "!", "double", false)
401+
assertToken(pp.newToken("pp_entry", true ), "pp_entry", "!!", "!!", "double", true )
402+
403+
-- Preprocessor keyword.
404+
assertToken(pp.newToken("pp_keyword", "line"), "pp_keyword", "line", "@line", nil, nil)
405+
assertToken(pp.newToken("pp_keyword", "@" ), "pp_keyword", "insert", "@@", nil, nil)
406+
407+
assert(not pcall(pp.newToken, "pp_keyword", "bad"))
408+
end)
409+
336410
doTest("Get useful tokens", function()
337411
local pp = ppChunk()
338412
local tokens = pp.tokenize[[local x = 5 -- Foo!]]

0 commit comments

Comments
 (0)