Skip to content

Commit 7b3ce9a

Browse files
updates to unicode latex options to prevent from overwriting ascii chars that are already handled
1 parent 8ea6180 commit 7b3ce9a

File tree

2 files changed

+60
-71
lines changed

2 files changed

+60
-71
lines changed

R/helpers.R

Lines changed: 30 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3000,20 +3000,6 @@ latex_special_chars <- c(
30003000
## Copied from https://github.com/phfaist/pylatexenc/blob/6dc2ce7fcd89b7cd1536c79c800f49f09535f5e9/pylatexenc/latexencode/_uni2latexmap.py
30013001

30023002
latex_unicode_chars <- c(
3003-
"\U0022"='"', # character ",
3004-
"\U0023"='\\#', # character #",
3005-
"\U0024"='\\$', # character $",
3006-
"\U0025"='\\%', # character %",
3007-
"\U0026"='\\&', # character &",
3008-
"\U003C"='\\ensuremath{<}', # <",
3009-
"\U003E"='\\ensuremath{>}', # >",
3010-
"\U005C"='\\textbackslash', # the \\ character itself",
3011-
"\U005E"='\\textasciicircum', # character ^",
3012-
"\U005F"='\\_', # character _",
3013-
"\U007B"='\\{', # character {",
3014-
"\U007D"='\\}', # character }",
3015-
"\U007E"='\\textasciitilde', # character ~",
3016-
"\U00A0"='~', # character NO-BREAK SPACE",
30173003
"\U00A1"='\\textexclamdown', # character ¡",
30183004
"\U00A2"='\\textcent', # character ¢",
30193005
"\U00A3"='\\textsterling', # character £",
@@ -3026,7 +3012,6 @@ latex_unicode_chars <- c(
30263012
"\U00AA"='\\textordfeminine', # character ª",
30273013
"\U00AB"='\\guillemotleft', # character «",
30283014
"\U00AC"='\\textlnot', # character ¬",
3029-
"\U00AD"='\\-', # SOFT HYPHEN [­]",
30303015
"\U00AE"='\\textregistered', # character ®",
30313016
"\U00AF"='\\textasciimacron', # character ¯",
30323017
"\U00B0"='\\textdegree', # character °",
@@ -3260,8 +3245,6 @@ latex_unicode_chars <- c(
32603245
"\U02DC"='\\textasciitilde',
32613246
"\U02DD"='\\textacutedbl', # 0x02DD",
32623247

3263-
"\U02BC"="'", # MODIFIER LETTER APOSTROPHE",
3264-
32653248
"\U0307"='\\ensuremath{\\dot{}}',
32663249
"\U0308"='\\ensuremath{\\ddot{}}',
32673250

@@ -3277,24 +3260,24 @@ latex_unicode_chars <- c(
32773260
"\U0392"='B', # GREEK CAPITAL LETTER BETA",
32783261
"\U0393"='\\ensuremath{\\Gamma}', # GREEK CAPITAL LETTER GAMMA",
32793262
"\U0394"='\\ensuremath{\\Delta}', # ...",
3280-
"\U0395"='E',
3281-
"\U0396"='Z',
3282-
"\U0397"='H',
3263+
# "\U0395"='E',
3264+
# "\U0396"='Z',
3265+
# "\U0397"='H',
32833266
"\U0398"='\\ensuremath{\\Theta}',
3284-
"\U0399"='I',
3285-
"\U039A"='K',
3267+
# "\U0399"='I',
3268+
# "\U039A"='K',
32863269
"\U039B"='\\ensuremath{\\Lambda}',
3287-
"\U039C"='M',
3288-
"\U039D"='N',
3270+
# "\U039C"='M',
3271+
# "\U039D"='N',
32893272
"\U039E"='\\ensuremath{\\Xi}',
3290-
"\U039F"='O',
3273+
# "\U039F"='O',
32913274
"\U03A0"='\\ensuremath{\\Pi}',
3292-
"\U03A1"='P',
3275+
# "\U03A1"='P',
32933276
"\U03A3"='\\ensuremath{\\Sigma}',
3294-
"\U03A4"='T',
3277+
# "\U03A4"='T',
32953278
"\U03A5"='\\ensuremath{\\Upsilon}',
32963279
"\U03A6"='\\ensuremath{\\Phi}',
3297-
"\U03A7"='X',
3280+
# "\U03A7"='X',
32983281
"\U03A8"='\\ensuremath{\\Psi}',
32993282
"\U03A9"='\\ensuremath{\\Omega}',
33003283

@@ -3608,9 +3591,9 @@ latex_unicode_chars <- c(
36083591

36093592
"\U200C"='\\textcompwordmark', # ZERO WIDTH NON-JOINER",
36103593

3611-
"\U2010"='-', # HYPHEN",
3594+
# "\U2010"='-', # HYPHEN",
36123595
"\U2011"='\\nobreakdash-', # NON-BREAKING HYPHEN, https://tex.stackexchange.com/a/330437/32188",
3613-
"\U2012"='-', # FIGURE DASH",
3596+
# "\U2012"='-', # FIGURE DASH",
36143597
"\U2013"='\\textendash', # 0x2013",
36153598
"\U2014"='\\textemdash',
36163599
"\U2015"='\\textemdash', # HORIZONTAL BAR",
@@ -3624,14 +3607,14 @@ latex_unicode_chars <- c(
36243607
"\U2020"='\\textdagger',
36253608
"\U2021"='\\textdaggerdbl',
36263609
"\U2022"='\\textbullet',
3627-
"\U2024"='.', # ONE DOT LEADER [�\u0080�]",
3628-
"\U2025"='..', # TWO DOT LEADER [�\u0080�]",
3610+
# "\U2024"='.', # ONE DOT LEADER [�\u0080�]",
3611+
# "\U2025"='..', # TWO DOT LEADER [�\u0080�]",
36293612
"\U2026"='\\textellipsis',
36303613
"\U2030"='\\textperthousand',
36313614
"\U2031"='\\textpertenthousand',
3632-
"\U2032: \"'\", # PRIME [�\u0080�]",
3633-
"\U2033: \"''\", # DOUBLE PRIME [�\u0080�]",
3634-
"\U2034: \"'''\", # TRIPLE PRIME [�\u0080�]",
3615+
"\U2032"="'", # PRIME [�\u0080�]",
3616+
"\U2033"="''", # DOUBLE PRIME [�\u0080�]",
3617+
"\U2034"="'''", # TRIPLE PRIME [�\u0080�]",
36353618
"\U2035"='\\ensuremath{\\backprime}', # REVERSED PRIME [�\u0080�]",
36363619
"\U2039"='\\guilsinglleft',
36373620
"\U203A"='\\guilsinglright',
@@ -3640,11 +3623,11 @@ latex_unicode_chars <- c(
36403623
"\U2044"='\\textfractionsolidus',
36413624
"\U204E"='\\textasteriskcentered',
36423625
"\U2052"='\\textdiscount', # 0x2052",
3643-
"\U2057: \"''''\", # QUADRUPLE PRIME [�\u0081\u0097]",
3626+
"\U2057"="''''", # QUADRUPLE PRIME [�\u0081\u0097]",
36443627

36453628
"\U205F"='\\hspace{0.22em}', # MEDIUM MATHEMATICAL SPACE [�\u0081\u009f]",
36463629
"\U2060"='\\nolinebreak', # WORD JOINER [�\u0081�]",
3647-
"\U2061"='', # FUNCTION APPLICATION",
3630+
# "\U2061"='', # FUNCTION APPLICATION",
36483631

36493632
"\U20A1"='\\textcolonmonetary', # 0x20A1",
36503633
"\U20A4"='\\textlira',
@@ -3828,8 +3811,8 @@ latex_unicode_chars <- c(
38283811
#"\U2233"=NA, #ANTICLOCKWISE CONTOUR INTEGRAL
38293812
"\U2234"='\\ensuremath{\\therefore}',
38303813
"\U2235"='\\ensuremath{\\because}',
3831-
"\U2236"='\\ensuremath{:}',
3832-
"\U2237"='\\ensuremath{::}',
3814+
# "\U2236"='\\ensuremath{:}',
3815+
# "\U2237"='\\ensuremath{::}',
38333816

38343817
"\U223A"='\\ensuremath{\\mathbin{{:}\\!\\!{-}\\!\\!{:}}}', # GEOMETRIC PROPORTION [∺]",
38353818
"\U223B"='\\ensuremath{\\homothetic}', # HOMOTHETIC [∻]",
@@ -4143,13 +4126,19 @@ escape_latex <- function(text) {
41434126

41444127
regmatches(text[!na_text], m) <- escaped_chars
41454128

4146-
m2 <- gregexpr(paste0("[",paste0(names(latex_unicode_chars), collapse = ""),"]"), text[!na_text], perl = TRUE)
4129+
m2 <- gregexpr(paste0("[",paste0(names(latex_unicode_chars), collapse = "|"),"]"), text[!na_text], perl = TRUE)
41474130

41484131
unicode_chars <- regmatches(text[!na_text], m2)
41494132

4133+
# browser()
4134+
41504135
latex_unicode <-
41514136
lapply(unicode_chars, function(x) {
4152-
latex_unicode_chars[x]
4137+
new_var <- latex_unicode_chars[x]
4138+
if(length(new_var) > 0){
4139+
x[!is.na(new_var)] <- new_var[!is.na(new_var)]
4140+
}
4141+
x
41534142
})
41544143

41554144
regmatches(text[!na_text], m2) <- latex_unicode

tests/testthat/test-l_cols_merge.R

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -194,16 +194,16 @@ test_that("cols_merge_range() works correctly", {
194194
expect_match(
195195
as_latex(tbl_latex) %>% as.character(),
196196
paste0(
197-
".*767.6–928.1 & 382.0 & 674.5",
198-
".*403.3–461.5 & 15.1 & 242.8",
199-
".*686.4–54.1 & 282.7 & 56.3",
200-
".*662.6–148.8 & 984.6 & 928.1",
201-
".*198.5–65.1 & 127.4 & 219.3",
202-
".*132.1–118.1 & 91.2 & 874.3",
203-
".*349.7–307.1 & 566.7 & 542.9",
204-
".*63.7–504.3 & 152.0 & 724.5",
205-
".*105.4–729.8 & 962.4 & 336.4",
206-
".*924.2–424.6 & 740.8 & 104.2.*"
197+
".*767.6\\\\textendash928.1 & 382.0 & 674.5",
198+
".*403.3\\\\textendash461.5 & 15.1 & 242.8",
199+
".*686.4\\\\textendash54.1 & 282.7 & 56.3",
200+
".*662.6\\\\textendash148.8 & 984.6 & 928.1",
201+
".*198.5\\\\textendash65.1 & 127.4 & 219.3",
202+
".*132.1\\\\textendash118.1 & 91.2 & 874.3",
203+
".*349.7\\\\textendash307.1 & 566.7 & 542.9",
204+
".*63.7\\\\textendash504.3 & 152.0 & 724.5",
205+
".*105.4\\\\textendash729.8 & 962.4 & 336.4",
206+
".*924.2\\\\textendash424.6 & 740.8 & 104.2.*"
207207
)
208208
)
209209

@@ -221,16 +221,16 @@ test_that("cols_merge_range() works correctly", {
221221
expect_match(
222222
as_latex(tbl_latex) %>% as.character(),
223223
paste0(
224-
".*767.6–928.1 & 382.0 & 674.5",
225-
".*403.3–461.5 & 15.1 & 242.8",
226-
".*686.4–54.1 & 282.7 & 56.3",
227-
".*662.6–148.8 & 984.6 & 928.1",
228-
".*198.5–65.1 & 127.4 & 219.3",
229-
".*132.1–118.1 & 91.2 & 874.3",
230-
".*349.7–307.1 & 566.7 & 542.9",
231-
".*63.7–504.3 & 152.0 & 724.5",
232-
".*105.4–729.8 & 962.4 & 336.4",
233-
".*924.2–424.6 & 740.8 & 104.2.*"
224+
".*767.6\\\\textendash928.1 & 382.0 & 674.5",
225+
".*403.3\\\\textendash461.5 & 15.1 & 242.8",
226+
".*686.4\\\\textendash54.1 & 282.7 & 56.3",
227+
".*662.6\\\\textendash148.8 & 984.6 & 928.1",
228+
".*198.5\\\\textendash65.1 & 127.4 & 219.3",
229+
".*132.1\\\\textendash118.1 & 91.2 & 874.3",
230+
".*349.7\\\\textendash307.1 & 566.7 & 542.9",
231+
".*63.7\\\\textendash504.3 & 152.0 & 724.5",
232+
".*105.4\\\\textendash729.8 & 962.4 & 336.4",
233+
".*924.2\\\\textendash424.6 & 740.8 & 104.2.*"
234234
)
235235
)
236236

@@ -252,16 +252,16 @@ test_that("cols_merge_range() works correctly", {
252252
expect_match(
253253
as_latex(tbl_latex) %>% as.character(),
254254
paste0(
255-
".*767.6–928.1 & 382.0–674.5",
256-
".*403.3–461.5 & 15.1–242.8",
257-
".*686.4–54.1 & 282.7–56.3",
258-
".*662.6–148.8 & 984.6–928.1",
259-
".*198.5–65.1 & 127.4–219.3",
260-
".*132.1–118.1 & 91.2–874.3",
261-
".*349.7–307.1 & 566.7–542.9",
262-
".*63.7–504.3 & 152.0–724.5",
263-
".*105.4–729.8 & 962.4–336.4",
264-
".*924.2–424.6 & 740.8–104.2.*"
255+
".*767.6\\\\textendash928.1 & 382.0\\\\textendash674.5",
256+
".*403.3\\\\textendash461.5 & 15.1\\\\textendash242.8",
257+
".*686.4\\\\textendash54.1 & 282.7\\\\textendash56.3",
258+
".*662.6\\\\textendash148.8 & 984.6\\\\textendash928.1",
259+
".*198.5\\\\textendash65.1 & 127.4\\\\textendash219.3",
260+
".*132.1\\\\textendash118.1 & 91.2\\\\textendash874.3",
261+
".*349.7\\\\textendash307.1 & 566.7\\\\textendash542.9",
262+
".*63.7\\\\textendash504.3 & 152.0\\\\textendash724.5",
263+
".*105.4\\\\textendash729.8 & 962.4\\\\textendash336.4",
264+
".*924.2\\\\textendash424.6 & 740.8\\\\textendash104.2.*"
265265
)
266266
)
267267
})

0 commit comments

Comments
 (0)