@@ -91,7 +91,8 @@ local HTML_END_TAG_PATTERNS_FOR_TAGS_WITH_UNENCODED_CONTENTS = {
9191
9292
9393local xml = {
94- htmlAllowNoAttributeValue = true, -- @Doc (Maybe this should be an argument for toHtml()?)
94+ htmlAllowNoAttributeValue = true, -- @Doc (Maybe this should be an argument for toHtml()?)
95+ htmlScrambleEmailAddresses = true, -- @Doc (Maybe this should be an argument for toHtml()?)
9596}
9697xml.__index = xml
9798
128129local function insertNode(el, node)
129130 if node == "" then
130131 -- void
131- elseif !!( IS_TEXT` node` ) and !!( IS_TEXT` el[#el]` ) then
132+ elseif @@ IS_TEXT( node) and @@ IS_TEXT( el[#el]) then
132133 el[#el] = el[#el] .. node -- @Speed
133134 else
134135 table.insert(el, node)
@@ -697,7 +698,7 @@ xml.set_attribs = xml.updateAttributes -- :PenlightCompatibility
697698do
698699 local function _getText(buffer, el)
699700 for _, childNode in ipairs(el) do
700- if !!( IS_TEXT` childNode` ) then
701+ if @@ IS_TEXT( childNode) then
701702 table.insert(buffer, childNode)
702703 else
703704 _getText(buffer, childNode)
716717function xml.getTextOfDirectChildren(el)
717718 local buffer = {}
718719 for _, childNode in ipairs(el) do
719- if !!( IS_TEXT` childNode` ) then table.insert(buffer, childNode) end
720+ if @@ IS_TEXT( childNode) then table.insert(buffer, childNode) end
720721 end
721722 return table.concat(buffer)
722723end
732733 end
733734
734735 for _, childNode in ipairs(el) do
735- if !!( IS_TEXT` childNode` ) then
736+ if @@ IS_TEXT( childNode) then
736737 table.insert(buffer, childNode)
737738 else
738739 _getHtmlText(buffer, childNode)
800801 if node == nil then node = "" end
801802
802803 -- Attribute string matching is straight equality, except if the pattern is a $ capture, which always succeeds.
803- if !!( IS_TEXT` node` ) then
804- if not !!( IS_TEXT` patEl` ) then return false end
804+ if @@ IS_TEXT( node) then
805+ if not @@ IS_TEXT( patEl) then return false end
805806
806807 -- print(node, patEl) -- DEBUG
807808
854855
855856 local function advanceElement()
856857 elChildI = elChildI + 1 -- Next child element of data.
857- while el[elChildI] and !!( IS_TEXT` el[elChildI]` ) do
858+ while el[elChildI] and @@ IS_TEXT( el[elChildI]) do
858859 elChildI = elChildI + 1
859860 end
860861 return elChildI <= #el
917918
918919 xml.walk(patEl, false, function(tagName, currentEl)
919920 if
920- !!( IS_TEXT` currentEl[1]` ) and xml.isElement(currentEl[2]) and !!( IS_TEXT` currentEl[3]` )
921+ @@ IS_TEXT( currentEl[1]) and xml.isElement(currentEl[2]) and @@ IS_TEXT( currentEl[3])
921922 and currentEl[1]:find"%s*{{" and currentEl[3]:find"}}%s*"
922923 then
923924 table.remove(currentEl, 3)
@@ -1087,7 +1088,7 @@ local function shouldEncodeAsCdata(s)
10871088end
10881089
10891090local function nodeToXml(buffer, node)
1090- if !!( IS_TEXT` node` ) then
1091+ if @@ IS_TEXT( node) then
10911092 if shouldEncodeAsCdata(node) then
10921093 table.insert(buffer, "<![CDATA[")
10931094 table.insert(buffer, node)
@@ -1140,7 +1141,7 @@ end
11401141
11411142-- xmlString = xml.contentsToXml( node )
11421143function xml.contentsToXml(node)
1143- if !!( IS_TEXT` node` ) then return "" end
1144+ if @@ IS_TEXT( node) then return "" end
11441145
11451146 local buffer = {}
11461147 for _, childNode in ipairs(node) do
@@ -1152,7 +1153,7 @@ end
11521153
11531154
11541155local function nodeToXmlPretty(buffer, node, initialIndent,indent,attrIndent, indentTags)
1155- if !!( IS_TEXT` node` ) then
1156+ if @@ IS_TEXT( node) then
11561157 if not node:find"%S" then
11571158 -- void
11581159 elseif shouldEncodeAsCdata(node) then
@@ -1245,9 +1246,52 @@ xml.__tostring = xml.toPrettyXml -- :PenlightCompatibility
12451246
12461247
12471248
1248- local function nodeToHtml(buffer, node)
1249- if !!(IS_TEXT`node`) then
1250- table.insert(buffer, xml.encodeMoreEntities(node))
1249+ local function encodeEmailValue(buffer, s)
1250+ --
1251+ -- Encode characters as a mix of decimal and hexadecimal entities to
1252+ -- increase the chance of fooling address-harvesting bots.
1253+ --
1254+ -- We use a deterministic encoding to make unit testing possible.
1255+ -- We encode roughly 40% hex, 40% dec, 20% plain.
1256+ --
1257+ -- This algorithm comes from Markdown.pl by John Gruber and was based on a
1258+ -- filter that Matthew Wickline wrote on some mailing list in the ancient
1259+ -- times. Isn't programming fun?
1260+ --
1261+ local encoderHex = {count=1, rate=.40, encode=function(c) return F("&#x%x;", c:byte()) end}
1262+ local encoderDec = {count=0, rate=.40, encode=function(c) return F("&#%d;", c:byte()) end}
1263+ local encoderPlain = {count=0, rate=.20, encode=function(c) return c end}
1264+ local encoders = {encoderHex, encoderDec, encoderPlain}
1265+
1266+ for pos = 1, #s do
1267+ for _, encoder in ipairs(encoders) do
1268+ encoder.count = encoder.count + encoder.rate
1269+ end
1270+
1271+ if encoders[2].count > encoders[1].count then encoders[1], encoders[2] = encoders[2], encoders[1] end
1272+ if encoders[3].count > encoders[2].count then encoders[2], encoders[3] = encoders[3], encoders[2] end
1273+ if encoders[2].count > encoders[1].count then encoders[1], encoders[2] = encoders[2], encoders[1] end
1274+
1275+ local encoder = encoders[1]
1276+ local c = s:sub(pos, pos)
1277+
1278+ -- Force encoding of "@" to make the address less visible.
1279+ if @@CONSTSET{"@","&","<",">",'"',"'"}[c] and encoder == encoderPlain then encoder = encoders[2] end
1280+
1281+ table.insert(buffer, encoder.encode(c))
1282+ encoder.count = encoder.count - 1
1283+ end
1284+ end
1285+
1286+ local function nodeToHtml(buffer, node, encodeTextAsEmail)
1287+ if @@IS_TEXT(node) then
1288+ if encodeTextAsEmail and node:find("@", 1, true) then
1289+ -- We just assume the text is an e-mail address. It's possible
1290+ -- it's not. It's also possible the node is just "@".
1291+ encodeEmailValue(buffer, node)
1292+ else
1293+ table.insert(buffer, xml.encodeMoreEntities(node))
1294+ end
12511295 return
12521296 end
12531297
@@ -1270,7 +1314,12 @@ local function nodeToHtml(buffer, node)
12701314
12711315 if not (attrValue == "" and allowNoAttrValue) then
12721316 table.insert(buffer, '="')
1273- table.insert(buffer, xml.encodeMoreEntities(attrValue))
1317+ if attrName == "href" and el.tag == "a" and attrValue:find"^[Mm][Aa][Ii][Ll][Tt][Oo]:" then
1318+ encodeEmailValue(buffer, attrValue)
1319+ encodeTextAsEmail = xml.htmlScrambleEmailAddresses
1320+ else
1321+ table.insert(buffer, xml.encodeMoreEntities(attrValue))
1322+ end
12741323 table.insert(buffer, '"')
12751324 end
12761325 end
@@ -1288,7 +1337,7 @@ local function nodeToHtml(buffer, node)
12881337 end
12891338 else
12901339 for _, childNode in ipairs(el) do
1291- nodeToHtml(buffer, childNode)
1340+ nodeToHtml(buffer, childNode, encodeTextAsEmail )
12921341 end
12931342 end
12941343 end
@@ -1309,17 +1358,17 @@ function xml.toHtml(node, preface)
13091358 if preface then
13101359 buffer[1] = (type(preface) == "string") and preface or !(HTML_STANDARD_PREFACE.."\n")
13111360 end
1312- nodeToHtml(buffer, node)
1361+ nodeToHtml(buffer, node, false )
13131362 return table.concat(buffer, "")
13141363end
13151364
13161365-- htmlString = xml.contentsToHtml( node )
13171366function xml.contentsToHtml(node)
1318- if !!( IS_TEXT` node` ) then return "" end
1367+ if @@ IS_TEXT( node) then return "" end
13191368
13201369 local buffer = {}
13211370 for _, childNode in ipairs(node) do
1322- nodeToHtml(buffer, childNode)
1371+ nodeToHtml(buffer, childNode, false )
13231372 end
13241373 return table.concat(buffer, "")
13251374end
@@ -1345,7 +1394,7 @@ function xml.parseXml(s, pathForError)
13451394 end
13461395
13471396 for _, node in ipairs(docWrapper) do
1348- if not !!( IS_TEXT` node` ) then
1397+ if not @@ IS_TEXT( node) then
13491398 if doc then
13501399 fileError(pathForError, s, #s+1, "There are multiple root elements.")
13511400 end
@@ -1384,7 +1433,7 @@ function xml.parseHtml(s, pathForError)
13841433 end
13851434
13861435 for _, node in ipairs(docWrapper) do
1387- if not !!( IS_TEXT` node` ) then
1436+ if not @@ IS_TEXT( node) then
13881437 if doc then
13891438 fileError(pathForError, s, #s, "There are multiple root elements.")
13901439 end
@@ -1527,7 +1576,7 @@ function xml.element(tagName, prototype)
15271576 if not prototype then
15281577 -- void
15291578
1530- elseif !!( IS_TEXT` prototype` ) or xml.isElement(prototype) then
1579+ elseif @@ IS_TEXT( prototype) or xml.isElement(prototype) then
15311580 el[1] = prototype
15321581
15331582 else
@@ -1548,7 +1597,7 @@ xml.elem = xml.element -- :PenlightCompatibility
15481597
15491598do
15501599 local function _clone(node, textSubstCallback, parentEl)
1551- if !!( IS_TEXT` node` ) then
1600+ if @@ IS_TEXT( node) then
15521601 if textSubstCallback then
15531602 node = textSubstCallback(node, "*TEXT", parentEl) or errorf("No value returned from text substitution callback for '*TEXT'.")
15541603 end
@@ -1595,7 +1644,7 @@ end
15951644-- nodesLookEqual = xml.compare( value1, value2 )
15961645-- Returns false if any value is not a node.
15971646function xml.compare(v1, v2)
1598- if !!( IS_TEXT`v1` ) and !!( IS_TEXT`v2` ) then
1647+ if @@ IS_TEXT(v1 ) and @@ IS_TEXT(v2 ) then
15991648 return v1 == v2
16001649 end
16011650 if not (xml.isElement(v1) and xml.isElement(v2)) then
0 commit comments