File tree Expand file tree Collapse file tree 3 files changed +22
-4
lines changed Expand file tree Collapse file tree 3 files changed +22
-4
lines changed Original file line number Diff line number Diff line change 1
1
# wordcount
2
2
3
- This filter counts the words in the body of a document (omitting
3
+ This filter counts the words and characters in the body of a document (omitting
4
4
metadata like titles and abstracts), including words in code.
5
- It should be more accurate than ` wc -w ` run directly on a
6
- Markdown document, since the latter will count markup
5
+ It should be more accurate than ` wc -w ` or ` wc -m ` run directly on a
6
+ Markdown document, since ` wc ` will also count markup
7
7
characters, like the ` # ` in front of an ATX header, or
8
- tags in HTML documents, as words .
8
+ tags in HTML documents.
9
9
10
10
To run it, ` pandoc --lua-filter wordcount.lua myfile.md ` .
11
11
The word count will be printed to stdout.
Original file line number Diff line number Diff line change 1
1
15 words in body
2
+ 68 characters in body
3
+ 79 characters in body (including spaces)
Original file line number Diff line number Diff line change 1
1
-- counts words in a document
2
2
3
3
words = 0
4
+ characters = 0
5
+ characters_and_spaces = 0
4
6
5
7
wordcount = {
6
8
Str = function (el )
7
9
-- we don't count a word if it's entirely punctuation:
8
10
if el .text :match (" %P" ) then
9
11
words = words + 1
10
12
end
13
+ characters = characters + utf8.len (el .text )
14
+ characters_and_spaces = characters_and_spaces + utf8.len (el .text )
11
15
end ,
12
16
17
+ Space = function (el )
18
+ characters_and_spaces = characters_and_spaces + 1
19
+ end ,
20
+
13
21
Code = function (el )
14
22
_ ,n = el .text :gsub (" %S+" ," " )
15
23
words = words + n
24
+ text_nospace = el .text :gsub (" %s" , " " )
25
+ characters = characters + utf8.len (text_nospace )
26
+ characters_and_spaces = characters_and_spaces + utf8.len (el .text )
16
27
end ,
17
28
18
29
CodeBlock = function (el )
19
30
_ ,n = el .text :gsub (" %S+" ," " )
20
31
words = words + n
32
+ text_nospace = el .text :gsub (" %s" , " " )
33
+ characters = characters + utf8.len (text_nospace )
34
+ characters_and_spaces = characters_and_spaces + utf8.len (el .text )
21
35
end
22
36
}
23
37
24
38
function Pandoc (el )
25
39
-- skip metadata, just count body:
26
40
pandoc .walk_block (pandoc .Div (el .blocks ), wordcount )
27
41
print (words .. " words in body" )
42
+ print (characters .. " characters in body" )
43
+ print (characters_and_spaces .. " characters in body (including spaces)" )
28
44
os.exit (0 )
29
45
end
You can’t perform that action at this time.
0 commit comments