Skip to content

Commit 177d1a6

Browse files
committed
Added functions percent() and urlRaw().
url() and related code now fixes relative paths if baseUrl points to a subdirectory. Updated URL-related functions. Fixed a couple of error messages missing some parts.
1 parent 1b3585a commit 177d1a6

File tree

3 files changed

+110
-58
lines changed

3 files changed

+110
-58
lines changed

src/app.lua2p

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ local function setup()
466466
findAll = itemWithAll,
467467
floor = math.floor,
468468
formatTemplate = formatTemplate,
469-
generatorMeta = generatorMeta,
469+
generatorMeta = getGeneratorHtmlMetaTag,
470470
getFilename = getFilename,
471471
getKeys = getKeys,
472472
gmatchAndBetween = gmatchAndBetween,
@@ -478,7 +478,8 @@ local function setup()
478478
max = math.max,
479479
min = math.min,
480480
newStringBuilder = newStringBuilder, newBuffer = newStringBuilder, -- newBuffer() is @Deprecated, I guess? 2021-05-15
481-
prettyUrl = toPrettyUrl,
481+
percent = percentEncode,
482+
prettyUrl = urlToPrettyText,
482483
printf = printf,
483484
printfOnce = printfOnce,
484485
printObject = printObject,
@@ -491,9 +492,10 @@ local function setup()
491492
toTime = datetimeToTime,
492493
trim = trim,
493494
trimNewlines = trimNewlines,
494-
url = toUrl,
495-
urlAbs = toUrlAbsolute,
495+
url = fixRelativeUrlAndEncode,
496+
urlAbs = toAbsoluteAndEncodedUrl,
496497
urlExists = urlExists,
498+
urlRaw = partialEncodeUrl,
497499
urlize = urlize,
498500

499501
chooseExistingFile = function(sitePathWithoutExt, exts)
@@ -604,10 +606,10 @@ local function setup()
604606

605607
local pathImageRel = sitePathToPath(sitePathImageRel, 2)
606608
local thumbInfo = createThumbnail(pathImageRel, thumbW, thumbH, 2)
607-
local thumbUrl = toUrl("/"..thumbInfo.path)
609+
local thumbUrl = fixRelativeUrlAndEncode("/"..thumbInfo.path)
608610

609611
local b = newStringBuilder()
610-
if isLink then b('<a href="%s" target="_blank">', toUrl("/"..pathImageRel)) end
612+
if isLink then b('<a href="%s" target="_blank">', fixRelativeUrlAndEncode("/"..pathImageRel)) end
611613
b('<img src="%s" width="%d" height="%d" alt="">', encodeHtmlEntities(thumbUrl), thumbInfo.width, thumbInfo.height)
612614
if isLink then b('</a>') end
613615

@@ -647,8 +649,8 @@ local function setup()
647649
a = function(url, label)
648650
return F(
649651
'<a href="%s">%s</a>',
650-
encodeHtmlEntities(toUrl(url)),
651-
encodeHtmlEntities(label or toPrettyUrl(url))
652+
encodeHtmlEntities(fixRelativeUrlAndEncode(url)),
653+
encodeHtmlEntities(label or urlToPrettyText(url))
652654
)
653655
end,
654656

@@ -660,14 +662,14 @@ local function setup()
660662
if title then
661663
return F(
662664
'<img src="%s" alt="%s" title="%s">',
663-
encodeHtmlEntities(toUrl(url)),
665+
encodeHtmlEntities(fixRelativeUrlAndEncode(url)),
664666
encodeHtmlEntities(alt or ""),
665667
encodeHtmlEntities(title == true and alt or title)
666668
)
667669
else
668670
return F(
669671
'<img src="%s" alt="%s">',
670-
encodeHtmlEntities(toUrl(url)),
672+
encodeHtmlEntities(fixRelativeUrlAndEncode(url)),
671673
encodeHtmlEntities(alt or "")
672674
)
673675
end
@@ -1269,8 +1271,7 @@ local function setup()
12691271
scriptEnvironmentGlobals.fori__ = ipairs
12701272
scriptEnvironmentGlobals.foriReverse__ = ipairsr
12711273
scriptEnvironmentGlobals.lock__ = scriptEnvironmentGlobals.lock
1272-
scriptEnvironmentGlobals.type__ = type
1273-
scriptEnvironmentGlobals.url__ = toUrl
1274+
scriptEnvironmentGlobals.url__ = fixRelativeUrlAndEncode
12741275

12751276

12761277

src/functions.lua2p

Lines changed: 83 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
F, formatBytes, formatTemplate
2222
generateFromTemplateString, generateFromTemplateFile, generateRedirection
2323
generateNonPages
24-
generatorMeta
2524
getCwd
2625
getDirectory, getFilename, getExtension, getBasename
2726
getFileContentsBinary, getFileContentsText
27+
getGeneratorHtmlMetaTag
2828
getKeys
2929
getLayoutTemplate
3030
getLineNumber
@@ -51,6 +51,7 @@
5151
pairsSorted
5252
parseAndRunTemplate
5353
pathToSitePath, sitePathToPath
54+
percentEncode, partialEncodeUrl, fixRelativeUrlAndEncode, toAbsoluteAndEncodedUrl, urlize, urlToPrettyText
5455
printNoLog, printfNoLog, log, print, printOnce, printf, printfOnce, timestampPrint, timestampPrintOnce, timestampPrintVerbose, timestampPrintError, timestampPrintWarning, timestampPrintWarningOnce, printObject, logErrorTraceback
5556
pushContext, assertContext, getContext, isContext
5657
removeItem
@@ -62,7 +63,6 @@
6263
splitString
6364
toNormalPath, toWindowsPath
6465
tostringForTemplates
65-
toUrl, toUrlAbsolute, urlize, toPrettyUrl
6666
traverseDirectory, traverseFiles
6767
trim, trimNewlines
6868
unindent
@@ -1305,52 +1305,102 @@ end
13051305

13061306

13071307
do
1308-
local URI_PERCENT_CODES_TO_NOT_ENCODE = {
1309-
["%2d"]="-",["%2e"]=".",["%7e"]="~",--["???"]="_",
1310-
["%21"]="!",["%23"]="#",["%24"]="$",["%26"]="&",["%27"]="'",["%28"]="(",["%29"]=")",["%2a"]="*",["%2b"]="+",
1311-
["%2c"]=",",["%2f"]="/",["%3a"]=":",["%3b"]=";",["%3d"]="=",["%3f"]="?",["%40"]="@",["%5b"]="[",["%5d"]="]",
1308+
_G.percentEncode = urlLib.escape
1309+
1310+
local URI_PERCENT_CODES_TO_NOT_ENCODE = { -- AKA reserved characters (except non-ASCII characters).
1311+
-- Note: These characters are explicitly unreserved and are thus never encoded
1312+
-- in the first place: A-Z a-z 0-9 - _ . ~
1313+
1314+
-- Note: This is similar to encodeURI() in JavaScript, except we also don't
1315+
-- encode '[' and ']' (because of RFC 3986 which added those characters as
1316+
-- reserved for IPv6 support).
1317+
1318+
["%21"]="!",["%23"]="#",["%24"]="$",["%26"]="&",["%27"]="'",["%28"]="(",["%29"]=")",["%2a"]="*",
1319+
["%2b"]="+",["%2c"]=",",["%2f"]="/",["%3a"]=":",["%3b"]=";",["%3d"]="=",["%3f"]="?",["%40"]="@",
1320+
1321+
-- IPv6.
1322+
["%5b"]="[",["%5d"]="]",
1323+
1324+
-- Prevent double-encoding, because lots of URLs in text form everywhere
1325+
-- already have %xx sequences. (We assume URLs don't have literal percent
1326+
-- signs, which might be a problem in rare cases. However, we never return an
1327+
-- incorrectly encoded URL. :EnsureValidPercentEncoding)
1328+
["%25"]="%",
1329+
1330+
-- Info:
1331+
-- JavaScript's encodeURI() does not encode: A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; , / ? : @ & = + $ #
1332+
-- JavaScript's encodeURIComponent() does not encode: A-Z a-z 0-9 - _ . ! ~ * ' ( )
13121333
}
13131334

1314-
function _G.toUrl(url)
1315-
if type(url) ~= "string" then
1316-
errorf(2, "Bad type of 'url' argument. (Got %s)", type(url))
1317-
end
1335+
-- This mostly just encodes non-ASCII characters (but also e.g. spaces).
1336+
function _G.partialEncodeUrl(url)
1337+
--
1338+
-- :EnsureValidPercentEncoding
1339+
-- Make sure all existing percent signs denote encoded characters before we do
1340+
-- anything else. It's possible that this breaks some URLs in extremely rare
1341+
-- cases (if the given URL is a bit ambiguous), but we have to assume properly
1342+
-- percent-encoded characters already exist in many URLs.
1343+
--
1344+
url = url:gsub("%%([^%%]?[^%%]?)", function(hopefullyHex)
1345+
if not hopefullyHex:find"^%x%x$" then
1346+
return "%25"..hopefullyHex
1347+
end
1348+
end)
13181349

1319-
url = urlLib.escape(url)
1320-
url = url:gsub("%%%x%x", URI_PERCENT_CODES_TO_NOT_ENCODE)
1350+
return (urlLib.escape(url):gsub("%%%x%x", URI_PERCENT_CODES_TO_NOT_ENCODE))
1351+
end
13211352

1322-
return url
1353+
function _G.fixRelativeUrlAndEncode(url)
1354+
!ARGS "url:string"
1355+
url = url:gsub("^/%f[^/]", (site.baseUrl.v:gsub("^%w+://[^/]+", ""))) -- @Speed
1356+
return (partialEncodeUrl(url))
13231357
end
13241358

1325-
-- print(toUrl("http://www.example.com/some-path/File~With (Stuff_åäö).jpg?key=value&foo=bar#hash")) -- TEST
1326-
end
1359+
function _G.toAbsoluteAndEncodedUrl(url)
1360+
!ARGS "url:string"
1361+
url = url:gsub("^/%f[^/]", site.baseUrl.v)
1362+
return (partialEncodeUrl(url))
1363+
end
13271364

1328-
function _G.toUrlAbsolute(url)
1329-
url = url:gsub("^/%f[^/]", site.baseUrl.v)
1330-
return (toUrl(url))
1365+
--[[ TEST @Cleanup: Include this in testsite (and add more URL encoding tests).
1366+
_G.site = {baseUrl={v="http://example.com/sub-folder/"}}
1367+
local urlAbs = "http://example.com/some-path/File~With (Stuff_åäö).jpg?key=value&foo=bar#hash"
1368+
local urlRel = "/images/cat.jpg"
1369+
local result = percentEncode(urlAbs) ; print("PercentEncode", result) ; assert(result == "http%3a%2f%2fexample.com%2fsome-path%2fFile~With%20%28Stuff_%c3%a5%c3%a4%c3%b6%29.jpg%3fkey%3dvalue%26foo%3dbar%23hash")
1370+
local result = partialEncodeUrl(urlAbs) ; print("PartialEncode", result) ; assert(result == "http://example.com/some-path/File~With%20(Stuff_%c3%a5%c3%a4%c3%b6).jpg?key=value&foo=bar#hash")
1371+
local result = partialEncodeUrl("cool%20any%%20run") ; print("BadUrl ", result) ; assert(result == "cool%20any%25%20run")
1372+
local result = fixRelativeUrlAndEncode(urlRel) ; print("RelativeUrl ", result) ; assert(result == "/sub-folder/images/cat.jpg")
1373+
local result = toAbsoluteAndEncodedUrl(urlRel) ; print("AbsoluteUrl ", result) ; assert(result == "http://example.com/sub-folder/images/cat.jpg")
1374+
os.exit(2)
1375+
--]]
13311376
end
13321377

13331378
function _G.urlize(text)
1334-
text = text
1379+
!ARGS "text:string"
1380+
text = (
1381+
text
1382+
:gsub("[%p%s]+", "-")
1383+
:gsub("^%-+", "")
1384+
:gsub("%-+$", "")
13351385
:lower()
1336-
:gsub("[%p ]+", "-")
1337-
:gsub("^%-+", "")
1338-
:gsub("%-+$", "")
1339-
1386+
)
13401387
return text == "" and "-" or text
13411388
end
13421389

1343-
function _G.toPrettyUrl(url)
1344-
return (url
1390+
function _G.urlToPrettyText(url)
1391+
!ARGS "url:string"
1392+
return (
1393+
urlLib.unescape(url)
13451394
:gsub("^https?://", "")
1346-
:gsub("^www%.", "")
1347-
:gsub("/+$", "")
1395+
:gsub("^www%.", "")
1396+
:gsub("/+$", "")
1397+
:gsub(" ", "+") -- Because we unescaped all characters.
13481398
)
13491399
end
13501400

13511401

13521402

1353-
function _G.generatorMeta(hideVersion)
1403+
function _G.getGeneratorHtmlMetaTag(hideVersion)
13541404
return
13551405
hideVersion
13561406
and '<meta name="generator" content="LuaWebGen">'
@@ -1895,16 +1945,16 @@ function _G.generateRedirection(url, targetUrl, sourcePath)
18951945
<meta charset="utf-8">
18961946
<meta name="robots" content="noindex, follow">
18971947
<meta http-equiv="refresh" content="3; url=:urlAbsPercent:">
1898-
<title>:url:</title>
1948+
<title>:urlPretty:</title>
18991949
</head>
19001950
<body>
19011951
<p>Page has moved. If you are not redirected automatically,
19021952
click <a href=":urlAbsPercent:">here</a>.</p>
19031953
</body>
19041954
</html>
19051955
]=], {
1906-
url = encodeHtmlEntities( targetUrl ),
1907-
urlAbsPercent = encodeHtmlEntities(toUrlAbsolute(targetUrl)),
1956+
urlPretty = encodeHtmlEntities(urlToPrettyText (targetUrl)),
1957+
urlAbsPercent = encodeHtmlEntities(toAbsoluteAndEncodedUrl(targetUrl)),
19081958
}
19091959
)
19101960

@@ -2846,7 +2896,7 @@ do
28462896

28472897
local imageLoader = imageLoaders[extLower]
28482898
if not imageLoader then
2849-
return nil, F("Unknown image file format '%'.", extLower)
2899+
return nil, F("Unknown image file format '%s'.", extLower)
28502900
end
28512901

28522902
local image = imageLoader(pathImage)
@@ -2875,7 +2925,7 @@ function _G.getImageDimensions(pathImageRel, mustBeFast)
28752925
end
28762926

28772927
if mustBeFast then
2878-
return nil, F("Could not determine the dimensions of '%'.", maybeFullPath(DIR_CONTENT.."/"..pathImageRel))
2928+
return nil, F("Could not determine the dimensions of '%s'.", maybeFullPath(DIR_CONTENT.."/"..pathImageRel))
28792929
end
28802930

28812931
-- Try the much slower method using GD.

src/url.lua2p

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,18 @@
1212

1313
--============================================================]]
1414

15+
!local PATTERN_RESERVED_CHAR = "[^-_.~A-Za-z0-9]" -- Or in other words, one byte that isn't an unreserved character.
16+
1517
local M = {}
1618

17-
-- Protect a path segment, to prevent it from interfering with the url parsing.
18-
local ALLOWED_SEGMENTS = {
19-
["-"]=true, ["_"]=true, ["."]=true, ["!"]=true, ["~"]=true, ["*"]=true, ["'"]=true, ["("]=true,
20-
[")"]=true, [":"]=true, ["@"]=true, ["&"]=true, ["="]=true, ["+"]=true, ["$"]=true, [","]=true,
21-
-- Plus alphanumeric chars.
22-
}
19+
-- Protect a path segment, to prevent it from interfering with the URL parsing.
20+
local ALLOWED_IN_SEGMENT = !({
21+
"!", "*", "'", "(", ")", ":", "@", "&", "=", "+", "$", ",",
22+
-- Plus unreserved characters...
23+
})
2324
local function protectSegment(s)
24-
return s:gsub("[^A-Za-z0-9_]", function(c)
25-
if not ALLOWED_SEGMENTS[c] then
25+
return s:gsub(!(PATTERN_RESERVED_CHAR), function(c)
26+
if not ALLOWED_IN_SEGMENT[c] then
2627
return string.format("%%%02x", c:byte())
2728
end
2829
end)
@@ -57,20 +58,20 @@ end
5758

5859
-- Encode a string into its escaped hexadecimal representation.
5960
function M.escape(binStr)
60-
return binStr:gsub("([^A-Za-z0-9_])", function(c)
61+
return (binStr:gsub(!(PATTERN_RESERVED_CHAR), function(c)
6162
return F("%%%02x", c:byte())
62-
end)
63+
end))
6364
end
6465

6566
-- Decode a string from its escaped hexadecimal representation.
66-
function M.unescape(binStr)
67-
return (binStr:gsub("%%(%x%x)", function(hex)
67+
function M.unescape(url)
68+
return (url:gsub("%%(%x%x)", function(hex)
6869
return string.char(tonumber(hex, 16))
6970
end))
7071
end
7172

7273
--
73-
-- Parse a url into a table with all its parts according to RFC 2396.
74+
-- Parse a URL into a table with all its parts according to RFC 2396.
7475
--
7576
-- The following grammar describes the names given to the URL parts:
7677
-- <url> ::= <scheme> :// <authority> / <path> ; <params> ? <query> # <fragment>

0 commit comments

Comments
 (0)