Skip to content

Commit 2f5686f

Browse files
committed
Add mhchem.lua filter.
1 parent 9d59a92 commit 2f5686f

File tree

5 files changed

+398
-0
lines changed

5 files changed

+398
-0
lines changed

mhchem/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
DIFF ?= diff --strip-trailing-cr -u
2+
PANDOC ?= pandoc
3+
4+
test:
5+
@$(PANDOC) --lua-filter=mhchem.lua test.txt -o test.docx
6+
@rm -f test.native
7+
8+
.PHONY: test

mhchem/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# mhchem
2+
3+
[mhchem] is a widely-used LaTeX package for chemical notation.
4+
It is not understood natively by pandoc's LaTeX reader. This
5+
filter replaces any `\ce{}` commands in RawInline, RawBlock,
6+
and Math elements, with Math elements that can be reliably
7+
converted to other formats by pandoc.
8+
9+
## Usage
10+
11+
To convert a LaTeX document containing mhchem macros
12+
to docx, do
13+
14+
pandoc -f latex+raw_tex -L mhchem.lua input.tex -o output.docx
15+
16+
The `-f latex+raw_tex` part is essential; it ensures that
17+
bare `\ce{}` commands will be included in the pandoc AST as
18+
raw TeX, so that this filter can see them.
19+
20+
Related work:
21+
22+
- <https://github.com/mhchem/mhchemParser/>
23+

mhchem/mhchem.lua

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
-- For better performance we put these functions in local variables:
2+
local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt =
3+
lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
4+
lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt
5+
6+
local whitespacechar = S(" \t\r\n")
7+
local number = (R"09"^1 * (P"." * R"09"^1)^-1)
8+
local symbol = C(S"()[],;") + (P"\\" * C(S"{}"))
9+
10+
local function escapeTeX(x)
11+
return x:gsub("%%","\\%")
12+
:gsub("\\","\\\\")
13+
:gsub("([{}])", "\\%1")
14+
end
15+
16+
local arrows = {
17+
["->"] = "\\longrightarrow",
18+
["<-"] = "\\longleftarrow",
19+
["<->"] = "\\longleftrightarrow",
20+
["<-->"] = "\\longleftrightarrow", -- for now; we don't have a longer arrow
21+
["<=>"] = "\\rightleftharpoons",
22+
["<=>>"] = "\\longRightleftharpoons",
23+
["<<=>"] = "\\longLeftrightharpoons"
24+
}
25+
26+
local bonds = {
27+
["-"] = "{-}",
28+
["="] = "{=}",
29+
["#"] = "{\\equiv}",
30+
["1"] = "{-}",
31+
["2"] = "{=}",
32+
["3"] = "{\\equiv}",
33+
["..."] = "{\\cdot}{\\cdot}{\\cdot}",
34+
["...."] = "{\\cdot}{\\cdot}{\\cdot}{\\cdot}",
35+
["->"] = "{\\rightarrow}",
36+
["<-"] = "{\\leftarrow}",
37+
["~"] = "{\\tripledash}",
38+
["~-"] = "{\\rlap{\\lower.1em{-}}\\raise.1em{\\tripledash}}",
39+
["~--"] = "{\\rlap{\\lower.2em{-}}\\rlap{\\raise.2em{\\tripledash}}-}",
40+
["~="] = "{\\rlap{\\lower.2em{-}}\\rlap{\\raise.2em{\\tripledash}}-}",
41+
["-~-"] = "{\\rlap{\\lower.2em{-}}\\rlap{\\raise.2em{-}}\\tripledash}"
42+
}
43+
44+
-- math mode renderer
45+
local render =
46+
{ str = function(x)
47+
if #x > 0 then
48+
return "\\mathrm{" .. escapeTeX(x) .. "}"
49+
else
50+
return ""
51+
end
52+
end,
53+
element = function(x) return "\\mathrm{" .. escapeTeX(x) .. "}" end,
54+
superscript = function(x) return "^{" .. x .. "}" end,
55+
subscript = function(x) return "_{" .. x .. "}" end,
56+
number = function(x) return x end,
57+
math = function(x) return x end,
58+
fraction = function(n,d) return "\\frac{" .. n .. "}{" .. d .. "}" end,
59+
fractionparens = function(n,d) return "(" .. n .. "/" .. d .. ")" end,
60+
greek = function(x) return "\\mathrm{" .. x .. "}" end,
61+
arrow = function(arr, above, below)
62+
local result = arrows[arr]
63+
if above then
64+
result = "\\overset{" .. above .. "}{" .. result .. "}"
65+
end
66+
if below then
67+
result = "\\underset{" .. below .. "}{" .. result .. "}"
68+
end
69+
return result
70+
end,
71+
precipitate = function() return "\\downarrow " end,
72+
gas = function() return "\\uparrow " end,
73+
circa = function() return "{\\sim}" end,
74+
grouped = function(...)
75+
return "{" .. table.concat(table.pack(...)) .. "}"
76+
end
77+
}
78+
79+
Mhchem = P{ "Formula",
80+
Formula = Ct( V"FormulaPart"^0 ) * P(-1) / table.concat;
81+
FormulaPart = V"Molecule"
82+
+ V"ReactionArrow"
83+
+ V"Bond"
84+
+ V"Sup"
85+
+ V"Sub"
86+
+ V"Charge"
87+
+ V"Fraction"
88+
+ V"Number"
89+
+ V"Math"
90+
+ V"Precipitate"
91+
+ V"Gas"
92+
+ V"Letters"
93+
+ V"GreekLetter"
94+
+ V"Circa"
95+
+ V"Text"
96+
+ V"EquationOp"
97+
+ V"Space"
98+
+ V"TeXCommand"
99+
+ V"Times"
100+
+ V"Symbol" ;
101+
102+
Molecule = V"StoichiometricNumber"^-1 * V"MoleculePart"^1 ;
103+
MoleculePart = (V"Element" + V"Group") * V"ElementSub"^-1 / render.grouped ;
104+
Group = Cs(P"(" * V"MoleculePart"^1 * P")") ;
105+
StoichiometricNumber = (V"Number" + C(R"az") + V"Math" + V"Fraction") *
106+
Cc("\\;") * whitespacechar^0 ;
107+
Element = C(R"AZ" * R"az"^0) / render.element ;
108+
Charge = B(R"AZ" + R"az" + S")]}") * (S"+-") * #-R"AZ" /
109+
render.superscript ;
110+
ElementSub = C(R"09"^1) / render.str / render.subscript ;
111+
Precipitate = whitespacechar^0 * (P"(v)" + P"v") * whitespacechar^0 /
112+
render.precipitate ;
113+
Gas = whitespacechar^0 * (P"(^)" + P"^") * whitespacechar^0 /
114+
render.gas ;
115+
Bond = (C(S"#=-") * #R"AZ" / bonds) +
116+
(P"\\bond{" * Cmt(C((P(1) - P"}")^0),
117+
function(subj,pos,capt)
118+
local b = bonds[capt]
119+
if b then
120+
return pos, b
121+
else
122+
return false
123+
end
124+
end) * P"}") ;
125+
Letters = R"az"^1 / render.str ;
126+
Number = C(number) / render.number;
127+
NumberOrLetter = V"Number" + V"Letters" ;
128+
Fraction = (P"(" * V"NumberOrLetter"^1 * P"/" * V"NumberOrLetter"^1 * P")"
129+
/ render.fractionparens) +
130+
(V"NumberOrLetter" * P"/" * V"NumberOrLetter" / render.fraction);
131+
Sup = P"^" * (V"InBracesSuper" +
132+
(C(S"+-"^-1 * R"09"^0 * S"+-"^-1) / render.str)) / render.superscript ;
133+
Sub = P"_" * (V"InBraces" + (C(S"+-"^-1 * R"09"^0 * S"+-"^-1) / render.str)) /
134+
render.subscript ;
135+
TeXCommand = C(P"\\" * (R"AZ" + R"az")^1 * whitespacechar^0 * V"InBraces"^0) ;
136+
Math = P"$" * Cs((V"MathPart" + V"CEPart")^1) * P"$" / render.math ;
137+
MathPart = C((P(1) - (P"$" + V"CEPart"))^1) ;
138+
CEPart = P"\\ce{" * Ct((V"FormulaPart" - P"}")^0) * P"}" / table.concat ;
139+
GreekLetter = C(P"\\" *
140+
(( P"alpha" + P"beta" + P"gamma" + P"delta" + P"epsilon" +
141+
P"zeta" + P"eta" + P"theta" + P"iota" + P"kappa" +
142+
P"mu" + P"nu" + P"xi" + P"omicron" + P"pi" + P"rho" + P"sigma" +
143+
P"tau" + P"upsilon" + P"phi" + P"xi" + P"psi" + P"omega"
144+
) +
145+
(( P"Alpha" + P"Beta" + P"Gamma" + P"Delta" + P"Epsilon" +
146+
P"Zeta" + P"Eta" + P"Theta" + P"Iota" + P"Kappa" +
147+
P"Mu" + P"Nu" + P"Xi" + P"Omicron" + P"Pi" + P"Rho" + P"Sigma" +
148+
P"Tau" + P"Upsilon" + P"Phi" + P"Xi" + P"Psi" + P"Omega" )))) *
149+
whitespacechar^0 / render.greek ;
150+
EquationOp = whitespacechar^0 *
151+
C(P"+" + P"-" + P"=" + (P"\\pm")) *
152+
whitespacechar^0 /
153+
render.math;
154+
ReactionArrow =
155+
whitespacechar^0 *
156+
C(P"->" +
157+
P"<-->" +
158+
P"<->" +
159+
P"<-" +
160+
P"<=>>" +
161+
P"<=>" +
162+
P"<<=>") *
163+
(P"[" * Cs((V"FormulaPart" - P"]")^0) * P"]")^-2 *
164+
whitespacechar^0 / render.arrow ;
165+
Text = V"InBraces" ;
166+
Circa = P"\\ca" * whitespacechar^0 / render.circa ;
167+
Space = C(whitespacechar^1) / "~" ;
168+
Times = S".*" / "\\cdot " ;
169+
Symbol = symbol / render.str;
170+
InBraces = P"{" * Ct((((V"FormulaPart" - S"{}")^1) + V"InBraces")^0) * P"}" /
171+
table.concat ;
172+
InBracesSuper = P"{"
173+
* Ct(((( ((P"." / "\\bullet ") + V"FormulaPart") - S"{}")^1)
174+
+ V"InBraces")^0)
175+
* P"}" / table.concat
176+
}
177+
178+
function handleCe(s)
179+
local inner = s:sub(5,-2) -- strip off \ce{ and }
180+
local result = lpeg.match(Mhchem, inner)
181+
if not result then
182+
io.stderr:write("Could not parse mhchem formula " .. inner .. "\n")
183+
return "\\text{Could not parse}"
184+
end
185+
return result
186+
end
187+
188+
function RawInline(el)
189+
if (el.format == "latex" or el.format == "tex") and
190+
el.text:match("\\ce{") then
191+
local result = handleCe(el.text)
192+
if result then
193+
return pandoc.Math("InlineMath", handleCe(el.text))
194+
end
195+
end
196+
end
197+
198+
function RawBlock(el)
199+
local il = RawInline(el)
200+
if il then
201+
return pandoc.Para(il)
202+
end
203+
end
204+
205+
function Math(el)
206+
el.text = string.gsub(el.text, "(\\ce%b{})", handleCe)
207+
end

mhchem/test.docx

12.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)