@@ -3000,20 +3000,6 @@ latex_special_chars <- c(
30003000# # Copied from https://github.com/phfaist/pylatexenc/blob/6dc2ce7fcd89b7cd1536c79c800f49f09535f5e9/pylatexenc/latexencode/_uni2latexmap.py
30013001
30023002latex_unicode_chars <- c(
3003- " \U 0022" = ' "' , # character ",
3004- " \U 0023" = ' \\ #' , # character #",
3005- " \U 0024" = ' \\ $' , # character $",
3006- " \U 0025" = ' \\ %' , # character %",
3007- " \U 0026" = ' \\ &' , # character &",
3008- " \U 003C" = ' \\ ensuremath{<}' , # <",
3009- " \U 003E" = ' \\ ensuremath{>}' , # >",
3010- " \U 005C" = ' \\ textbackslash' , # the \\ character itself",
3011- " \U 005E" = ' \\ textasciicircum' , # character ^",
3012- " \U 005F" = ' \\ _' , # character _",
3013- " \U 007B" = ' \\ {' , # character {",
3014- " \U 007D" = ' \\ }' , # character }",
3015- " \U 007E" = ' \\ textasciitilde' , # character ~",
3016- " \U 00A0" = ' ~' , # character NO-BREAK SPACE",
30173003 " \U 00A1" = ' \\ textexclamdown' , # character ¡",
30183004 " \U 00A2" = ' \\ textcent' , # character ¢",
30193005 " \U 00A3" = ' \\ textsterling' , # character £",
@@ -3026,7 +3012,6 @@ latex_unicode_chars <- c(
30263012 " \U 00AA" = ' \\ textordfeminine' , # character ª",
30273013 " \U 00AB" = ' \\ guillemotleft' , # character «",
30283014 " \U 00AC" = ' \\ textlnot' , # character ¬",
3029- " \U 00AD" = ' \\ -' , # SOFT HYPHEN []",
30303015 " \U 00AE" = ' \\ textregistered' , # character ®",
30313016 " \U 00AF" = ' \\ textasciimacron' , # character ¯",
30323017 " \U 00B0" = ' \\ textdegree' , # character °",
@@ -3260,8 +3245,6 @@ latex_unicode_chars <- c(
32603245 " \U 02DC" = ' \\ textasciitilde' ,
32613246 " \U 02DD" = ' \\ textacutedbl' , # 0x02DD",
32623247
3263- " \U 02BC" = " '" , # MODIFIER LETTER APOSTROPHE",
3264-
32653248 " \U 0307" = ' \\ ensuremath{\\ dot{}}' ,
32663249 " \U 0308" = ' \\ ensuremath{\\ ddot{}}' ,
32673250
@@ -3277,24 +3260,24 @@ latex_unicode_chars <- c(
32773260 " \U 0392" = ' B' , # GREEK CAPITAL LETTER BETA",
32783261 " \U 0393" = ' \\ ensuremath{\\ Gamma}' , # GREEK CAPITAL LETTER GAMMA",
32793262 " \U 0394" = ' \\ ensuremath{\\ Delta}' , # ...",
3280- " \U 0395" = ' E' ,
3281- " \U 0396" = ' Z' ,
3282- " \U 0397" = ' H' ,
3263+ # "\U0395"='E',
3264+ # "\U0396"='Z',
3265+ # "\U0397"='H',
32833266 " \U 0398" = ' \\ ensuremath{\\ Theta}' ,
3284- " \U 0399" = ' I' ,
3285- " \U 039A" = ' K' ,
3267+ # "\U0399"='I',
3268+ # "\U039A"='K',
32863269 " \U 039B" = ' \\ ensuremath{\\ Lambda}' ,
3287- " \U 039C" = ' M' ,
3288- " \U 039D" = ' N' ,
3270+ # "\U039C"='M',
3271+ # "\U039D"='N',
32893272 " \U 039E" = ' \\ ensuremath{\\ Xi}' ,
3290- " \U 039F" = ' O' ,
3273+ # "\U039F"='O',
32913274 " \U 03A0" = ' \\ ensuremath{\\ Pi}' ,
3292- " \U 03A1" = ' P' ,
3275+ # "\U03A1"='P',
32933276 " \U 03A3" = ' \\ ensuremath{\\ Sigma}' ,
3294- " \U 03A4" = ' T' ,
3277+ # "\U03A4"='T',
32953278 " \U 03A5" = ' \\ ensuremath{\\ Upsilon}' ,
32963279 " \U 03A6" = ' \\ ensuremath{\\ Phi}' ,
3297- " \U 03A7" = ' X' ,
3280+ # "\U03A7"='X',
32983281 " \U 03A8" = ' \\ ensuremath{\\ Psi}' ,
32993282 " \U 03A9" = ' \\ ensuremath{\\ Omega}' ,
33003283
@@ -3608,9 +3591,9 @@ latex_unicode_chars <- c(
36083591
36093592 " \U 200C" = ' \\ textcompwordmark' , # ZERO WIDTH NON-JOINER",
36103593
3611- " \U 2010" = ' -' , # HYPHEN",
3594+ # "\U2010"='-', # HYPHEN",
36123595 " \U 2011" = ' \\ nobreakdash-' , # NON-BREAKING HYPHEN, https://tex.stackexchange.com/a/330437/32188",
3613- " \U 2012" = ' -' , # FIGURE DASH",
3596+ # "\U2012"='-', # FIGURE DASH",
36143597 " \U 2013" = ' \\ textendash' , # 0x2013",
36153598 " \U 2014" = ' \\ textemdash' ,
36163599 " \U 2015" = ' \\ textemdash' , # HORIZONTAL BAR",
@@ -3624,14 +3607,14 @@ latex_unicode_chars <- c(
36243607 " \U 2020" = ' \\ textdagger' ,
36253608 " \U 2021" = ' \\ textdaggerdbl' ,
36263609 " \U 2022" = ' \\ textbullet' ,
3627- " \U 2024" = ' .' , # ONE DOT LEADER [�\u0080�]",
3628- " \U 2025" = ' ..' , # TWO DOT LEADER [�\u0080�]",
3610+ # "\U2024"='.', # ONE DOT LEADER [�\u0080�]",
3611+ # "\U2025"='..', # TWO DOT LEADER [�\u0080�]",
36293612 " \U 2026" = ' \\ textellipsis' ,
36303613 " \U 2030" = ' \\ textperthousand' ,
36313614 " \U 2031" = ' \\ textpertenthousand' ,
3632- " \U 2032: \" ' \ " , # PRIME [�\u 0080�]" ,
3633- " \U 2033: \" '' \ " , # DOUBLE PRIME [�\u 0080�]" ,
3634- " \U 2034: \" '''\ " , # TRIPLE PRIME [�\u 0080�]" ,
3615+ " \U 2032" = " ' " , # PRIME [�\u0080�]",
3616+ " \U 2033" = " '' " , # DOUBLE PRIME [�\u0080�]",
3617+ " \U 2034" = " '''" , # TRIPLE PRIME [�\u0080�]",
36353618 " \U 2035" = ' \\ ensuremath{\\ backprime}' , # REVERSED PRIME [�\u0080�]",
36363619 " \U 2039" = ' \\ guilsinglleft' ,
36373620 " \U 203A" = ' \\ guilsinglright' ,
@@ -3640,11 +3623,11 @@ latex_unicode_chars <- c(
36403623 " \U 2044" = ' \\ textfractionsolidus' ,
36413624 " \U 204E" = ' \\ textasteriskcentered' ,
36423625 " \U 2052" = ' \\ textdiscount' , # 0x2052",
3643- " \U 2057: \" ''''\ " , # QUADRUPLE PRIME [�\u 0081\u 0097]" ,
3626+ " \U 2057" = " ''''" , # QUADRUPLE PRIME [�\u0081\u0097]",
36443627
36453628 " \U 205F" = ' \\ hspace{0.22em}' , # MEDIUM MATHEMATICAL SPACE [�\u0081\u009f]",
36463629 " \U 2060" = ' \\ nolinebreak' , # WORD JOINER [�\u0081�]",
3647- " \U 2061" = ' ' , # FUNCTION APPLICATION",
3630+ # "\U2061"='', # FUNCTION APPLICATION",
36483631
36493632 " \U 20A1" = ' \\ textcolonmonetary' , # 0x20A1",
36503633 " \U 20A4" = ' \\ textlira' ,
@@ -3828,8 +3811,8 @@ latex_unicode_chars <- c(
38283811 # "\U2233"=NA, #ANTICLOCKWISE CONTOUR INTEGRAL
38293812 " \U 2234" = ' \\ ensuremath{\\ therefore}' ,
38303813 " \U 2235" = ' \\ ensuremath{\\ because}' ,
3831- " \U 2236" = ' \\ ensuremath{:}' ,
3832- " \U 2237" = ' \\ ensuremath{::}' ,
3814+ # "\U2236"='\\ensuremath{:}',
3815+ # "\U2237"='\\ensuremath{::}',
38333816
38343817 " \U 223A" = ' \\ ensuremath{\\ mathbin{{:}\\ !\\ !{-}\\ !\\ !{:}}}' , # GEOMETRIC PROPORTION [∺]",
38353818 " \U 223B" = ' \\ ensuremath{\\ homothetic}' , # HOMOTHETIC [∻]",
@@ -4143,13 +4126,19 @@ escape_latex <- function(text) {
41434126
41444127 regmatches(text [! na_text ], m ) <- escaped_chars
41454128
4146- m2 <- gregexpr(paste0(" [" ,paste0(names(latex_unicode_chars ), collapse = " " )," ]" ), text [! na_text ], perl = TRUE )
4129+ m2 <- gregexpr(paste0(" [" ,paste0(names(latex_unicode_chars ), collapse = " | " )," ]" ), text [! na_text ], perl = TRUE )
41474130
41484131 unicode_chars <- regmatches(text [! na_text ], m2 )
41494132
4133+ # browser()
4134+
41504135 latex_unicode <-
41514136 lapply(unicode_chars , function (x ) {
4152- latex_unicode_chars [x ]
4137+ new_var <- latex_unicode_chars [x ]
4138+ if (length(new_var ) > 0 ){
4139+ x [! is.na(new_var )] <- new_var [! is.na(new_var )]
4140+ }
4141+ x
41534142 })
41544143
41554144 regmatches(text [! na_text ], m2 ) <- latex_unicode
0 commit comments