Skip to content

Commit 8ebbe09

Browse files
authored
wordcount: adds process-anyway option (#162)
In standalone mode, setting a `wordcount` variable to `process-anyway` allows the pandoc process to continue in addition to counting words.
1 parent 51aa0d9 commit 8ebbe09

File tree

2 files changed

+58
-29
lines changed

2 files changed

+58
-29
lines changed

wordcount/README.md

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,29 @@
11
# wordcount
22

3-
This filter counts the words and characters in the body of a document (omitting
4-
metadata like titles and abstracts), including words in code.
5-
It should be more accurate than `wc -w` or `wc -m` run directly on a
6-
Markdown document, since `wc` will also count markup
7-
characters, like the `#` in front of an ATX header, or
8-
tags in HTML documents.
3+
This filter counts the words and characters in the body of a document
4+
(omitting metadata like titles and abstracts), including words in
5+
code. It should be more accurate than `wc -w` or `wc -m` run directly
6+
on a Markdown document, since `wc` will also count markup characters,
7+
like the `#` in front of an ATX header, or tags in HTML documents.
98

109
To run it, `pandoc --lua-filter wordcount.lua myfile.md`.
1110
The word count will be printed to stdout.
11+
12+
If you want to process the document as well as printing the word count
13+
set the variable `wordcount` to `process` (or `process-anyway` or `convert`).
14+
This works only in conjunction with the standalone document option (`-s`).
15+
This can be done through the command line:
16+
17+
```
18+
pandoc -s -L wordcount.lua -M wordcount=process sample.md -o output.html
19+
```
20+
21+
Or the document's metadata block:
22+
23+
```
24+
---
25+
title: My Long Book
26+
wordcount: process-anyway
27+
---
28+
```
29+

wordcount/wordcount.lua

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,56 @@
1-
-- counts words in a document
1+
-- counts words in a document
22

3-
words = 0
3+
words = 0
44
characters = 0
55
characters_and_spaces = 0
6+
process_anyway = false
67

7-
wordcount = {
8-
Str = function(el)
9-
-- we don't count a word if it's entirely punctuation:
10-
if el.text:match("%P") then
11-
words = words + 1
12-
end
8+
wordcount = {
9+
Str = function(el)
10+
-- we don't count a word if it's entirely punctuation:
11+
if el.text:match("%P") then
12+
words = words + 1
13+
end
1314
characters = characters + utf8.len(el.text)
1415
characters_and_spaces = characters_and_spaces + utf8.len(el.text)
15-
end,
16+
end,
1617

1718
Space = function(el)
1819
characters_and_spaces = characters_and_spaces + 1
1920
end,
2021

21-
Code = function(el)
22-
_,n = el.text:gsub("%S+","")
23-
words = words + n
22+
Code = function(el)
23+
_,n = el.text:gsub("%S+","")
24+
words = words + n
2425
text_nospace = el.text:gsub("%s", "")
2526
characters = characters + utf8.len(text_nospace)
2627
characters_and_spaces = characters_and_spaces + utf8.len(el.text)
27-
end,
28+
end,
2829

29-
CodeBlock = function(el)
30-
_,n = el.text:gsub("%S+","")
31-
words = words + n
30+
CodeBlock = function(el)
31+
_,n = el.text:gsub("%S+","")
32+
words = words + n
3233
text_nospace = el.text:gsub("%s", "")
3334
characters = characters + utf8.len(text_nospace)
3435
characters_and_spaces = characters_and_spaces + utf8.len(el.text)
35-
end
36-
}
36+
end
37+
}
38+
39+
-- check if the `wordcount` variable is set to `process-anyway`
40+
function Meta(meta)
41+
if meta.wordcount and (meta.wordcount=="process-anyway"
42+
or meta.wordcount=="process" or meta.wordcount=="convert") then
43+
process_anyway = true
44+
end
45+
end
3746

38-
function Pandoc(el)
39-
-- skip metadata, just count body:
40-
pandoc.walk_block(pandoc.Div(el.blocks), wordcount)
41-
print(words .. " words in body")
47+
function Pandoc(el)
48+
-- skip metadata, just count body:
49+
pandoc.walk_block(pandoc.Div(el.blocks), wordcount)
50+
print(words .. " words in body")
4251
print(characters .. " characters in body")
4352
print(characters_and_spaces .. " characters in body (including spaces)")
44-
os.exit(0)
53+
if not process_anyway then
54+
os.exit(0)
55+
end
4556
end

0 commit comments

Comments
 (0)