1+ /* Soros interpreter (see numbertext.org)
2+ * 2018 (c) László Németh
3+ * License: LGPL/BSD dual license */
4+
15#include " Soros.hxx"
26
37#define ITERATION_LIMIT 250
8+ #define SEP L" \uE00A "
9+
10+ #ifdef HAVE_BOOST_REGEX_HPP
11+ #define FIX L" \\ "
12+ #else
13+ #define FIX L" "
14+ #endif
415
516const std::wstring Soros::m = L" \\\" ;#" ;
617const std::wstring Soros::m2 = L" $()|[]" ;
@@ -9,7 +20,7 @@ const std::wstring Soros::c2 = L"\uE004\uE005\uE006\uE007\uE008\uE009";
920const std::wstring Soros::slash = L" \uE000 " ;
1021const std::wstring Soros::pipe = L" \uE003 " ;
1122// pattern to recognize function calls in the replacement string
12- const std:: wregex Soros::func ( Soros::translate (
23+ const wregex Soros::func ( Soros::translate (
1324 L" (?:\\ |?(?:\\ $\\ ()+)?" // optional nested calls
1425 " (\\ |?\\ $\\ (([^\\ (\\ )]*)\\ )\\ |?)" // inner call (2 subgroups)
1526 " (?:\\ )+\\ |?)?" , // optional nested calls
@@ -30,64 +41,64 @@ Soros::Soros(std::wstring source, std::wstring filtered_lang):
3041{
3142 source = translate (source, m, c, L" \\ " ); // \\, \", \;, \# -> \uE000..\uE003
3243 // switch off all country-dependent lines, and switch on the requested ones
33- source = std:: regex_replace (source, std:: wregex (L" (^|[\n ;])([^\n ;#]*#[^\n ]*\\ [:[^\n :\\ ]]*:\\ ][^\n ]*)" ), L" $1#$2" );
44+ source = regex_replace (source, wregex (L" (^|[\n ;])([^\n ;#]*#[^\n ]*\\ [:[^\n :\\ ]]*:\\ ][^\n ]*)" ), L" $1#$2" );
3445 replace (filtered_lang, L" _" , L" -" );
35- source = std:: regex_replace (source, std:: wregex (L" (^|[\n ;])#([^\n ;#]*#[^\n ]*\\ [:" + filtered_lang + L" :\\ ][^\n ]*)" ), L" $1$2" );
36- source = std:: regex_replace (source, std:: wregex (L" (#[^\n ]*)?(\n |$)" ), L" ;" ); // remove comments
46+ source = regex_replace (source, wregex (L" (^|[\n ;])#([^\n ;#]*#[^\n ]*\\ [:" + filtered_lang + L" :\\ ][^\n ]*)" ), L" $1$2" );
47+ source = regex_replace (source, wregex (L" (#[^\n ]*)?(\n |$)" ), L" ;" ); // remove comments
3748 // __numbertext__ sets the place of left zero deletion rule
3849 if (source.find (L" __numbertext__" ) == std::wstring::npos)
3950 source.insert (0 , L" __numbertext__;" );
40- source = std:: regex_replace (source, std:: wregex (L" __numbertext__" ),
51+ source = regex_replace (source, wregex (L" __numbertext__" ),
4152 // default left zero deletion
42- L" \" ([a-z][-a-z]* )?0+(0|[1-9]\\ d*)\" $$(\\ 1\\ 2);"
53+ L" \" ([a-z][-a-z]* )?0+(0|[1-9]" FIX L" \\ d*)\" $$(" FIX L" \\ 1" FIX L" \\ 2);"
4354 // separator function
44- L" \"\uE00A (.*)\uE00A (.+)\uE00A (.*)\" \\ 1\\ 2\\ 3;"
55+ SEP L" (.*)" SEP L" (.+)" SEP L" (.*) " FIX L" \\ 1" FIX L" \\ 2" FIX L" \\ 3;"
4556 // no separation, if subcall returns with empty string
46- L" \"\uE00A .* \uE00A\uE00A .* \" " );
57+ SEP L" .* " SEP SEP L" .* " );
4758
48- std:: wregex p (L" ^\\ s*(\" [^\" ]*\" |[^\\ s]*)\\ s*(.*[^\\ s])?\\ s*$" );
49- std:: wregex macro (L" == *([^ ]*) *==" );
59+ wregex p (L" ^\\ s*(\" [^\" ]*\" |[^\\ s]*)\\ s*(.*[^\\ s])?\\ s*$" );
60+ wregex macro (L" == *([^ ]*) *==" );
5061 size_t pos = 0 ;
5162 size_t old_pos = 0 ;
52- std:: wregex quoteStart (L" ^\" " );
53- std:: wregex quoteEnd (L" \" $" );
63+ wregex quoteStart (L" ^\" " );
64+ wregex quoteEnd (L" \" $" );
5465 std::wstring smacro = L" " ;
5566 while ((pos = source.find (L" ;" , pos)) != std::wstring::npos) {
56- std:: wsmatch sp;
67+ wsmatch sp;
5768 std::wstring linOrig = source.substr (old_pos, pos - old_pos);
5869 // pattern extension after == macro ==:
5970 // foo bar -> "macro foo" bar
6071 // "foo bar" baz -> "macro foo bar" baz
6172 // "^foo bar" baz -> "^macro foo bar" baz
6273 std::wstring lin = linOrig;
63- if (smacro.length () > 0 && linOrig.length () > 0 && std:: regex_search (linOrig, sp, p))
74+ if (smacro.length () > 0 && linOrig.length () > 0 && regex_search (linOrig, sp, p))
6475 {
65- std::wstring s = std:: regex_replace (sp[1 ].str (), quoteStart, L" " );
66- s = std:: regex_replace (s, quoteEnd, L" " );
76+ std::wstring s = regex_replace (sp[1 ].str (), quoteStart, L" " );
77+ s = regex_replace (s, quoteEnd, L" " );
6778 std::wstring sEmpty = (s.length () == 0 ) ? L" " : L" " ;
6879 if (s[0 ] == L' ^' ) {
69- s = std:: regex_replace (s, std:: wregex (L" ^\\ ^" ), L" " );
80+ s = regex_replace (s, wregex (L" ^\\ ^" ), L" " );
7081 lin = L" \" ^" + smacro + sEmpty + s + L" \" " + sp[2 ].str ();
7182 } else
7283 lin = L" \" " + smacro + sEmpty + s + L" \" " + sp[2 ].str ();
7384 }
74- if (linOrig.length () > 0 && std:: regex_match (linOrig, sp, macro))
85+ if (linOrig.length () > 0 && regex_match (linOrig, sp, macro))
7586 {
7687 smacro = sp[1 ].str ();
7788 }
78- else if (lin.length () > 0 && std:: regex_search (lin, sp, p))
89+ else if (lin.length () > 0 && regex_search (lin, sp, p))
7990 {
80- std::wstring s = std:: regex_replace (sp[1 ].str (), quoteStart, L" " );
81- s = std:: regex_replace (s, quoteEnd, L" " );
91+ std::wstring s = regex_replace (sp[1 ].str (), quoteStart, L" " );
92+ s = regex_replace (s, quoteEnd, L" " );
8293 s = translate (s, c.substr (1 ), m.substr (1 ), L" " );
8394 replace (s, slash, L" \\\\ " ); // -> \\, ", ;, #
8495 begins.push_back (s[0 ] == L' ^' );
8596 ends.push_back (s[s.length ()-1 ] == L' $' );
86- s = L" ^" + std:: regex_replace (s, std:: wregex (L" ^\\ ^" ), L" " );
87- s = std:: regex_replace (s, std:: wregex (L" \\ $$" ), L" " ) + L" $" ;
97+ s = L" ^" + regex_replace (s, wregex (L" ^\\ ^" ), L" " );
98+ s = regex_replace (s, wregex (L" \\ $$" ), L" " ) + L" $" ;
8899 try
89100 {
90- patterns.push_back (std:: wregex (s));
101+ patterns.push_back (wregex (s));
91102 } catch (...)
92103 {
93104 std::wcout << L" Soros: bad regex in \" " << sp[1 ].str () << " \" " << std::endl;
@@ -96,26 +107,26 @@ Soros::Soros(std::wstring source, std::wstring filtered_lang):
96107 std::wstring s2 = L" " ;
97108 if (sp.size () > 1 )
98109 {
99- s2 = std:: regex_replace (sp[2 ].str (), quoteStart, L" " );
100- s2 = std:: regex_replace (s2, quoteEnd, L" " );
110+ s2 = regex_replace (sp[2 ].str (), quoteStart, L" " );
111+ s2 = regex_replace (s2, quoteEnd, L" " );
101112 }
102113 s2 = translate (s2, m2, c2, L" \\ " ); // \$, \(, \), \|, \[, \] -> \uE004..\uE009
103- // call inner separator: [ ... $1 ... ] -> $(\uE00A ... \uE00A$1\uE00A ... )
104- s2 = std:: regex_replace (s2, std:: wregex (L" ^\\ [[$](\\ d\\ d?|\\ ([^\\ )]+\\ ))" ),
105- L" $$(\uE00A\uE00A |$$$1\uE00A " ); // add "|" in terminating position
106- s2 = std:: regex_replace (s2, std:: wregex (L" \\ [([^$\\ [\\\\ ]*)[$](\\ d\\ d?|\\ ([^\\ )]+\\ ))" ),
107- L" $$(\uE00A $1 \uE00A $$$2\uE00A " );
108- s2 = std:: regex_replace (s2, std:: wregex (L" \uE00A\\ ]$" ), L" |\uE00A )" ); // add "|" in terminating position
114+ // call inner separator: " [ ... $1 ... ]" -> "$(" SEP " ... " SEP "$1" SEP " ... )"
115+ s2 = regex_replace (s2, wregex (L" ^\\ [[$](\\ d\\ d?|\\ ([^\\ )]+\\ ))" ),
116+ L" $$(" SEP SEP L" |$$$1" SEP ); // add "|" in terminating position
117+ s2 = regex_replace (s2, wregex (L" \\ [([^$\\ [\\\\ ]*)[$](\\ d\\ d?|\\ ([^\\ )]+\\ ))" ),
118+ L" $$(" SEP L" $1 " SEP L" $$$2" SEP );
119+ s2 = regex_replace (s2, wregex (SEP L" \\ ]$" ), L" |" SEP L" )" ); // add "|" in terminating position
109120 s2 = translate (s2, L" ]" , L" )" , L" " );
110- s2 = std:: regex_replace (s2, std:: wregex (L" ([$]\\ d|\\ ))\\ |[$]" ), L" $1||$$" ); // $()|$() -> $()||$()
121+ s2 = regex_replace (s2, wregex (L" ([$]\\ d|\\ ))\\ |[$]" ), L" $1||$$" ); // $()|$() -> $()||$()
111122 s2 = translate (s2, c, m, L" " ); // \uE000..\uE003-> \, ", ;, #
112123 s2 = translate (s2, m2.substr (0 , 4 ), c, L" " ); // $, (, ), | -> \uE000..\uE003
113124 s2 = translate (s2, c2, m2, L" " ); // \uE004..\uE007 -> $, (, ), |
114- s2 = std:: regex_replace (s2, std:: wregex (L" [$]" ), L" \\ $$" ); // $ -> \$
115- s2 = std:: regex_replace (s2, std:: wregex (L" \uE000 (\\ d)" ), L" \uE000\uE001 $$$1\uE002 " ); // $n -> $(\n)
116- s2 = std:: regex_replace (s2, std:: wregex (L" \\\\ ([1-9])" ), L" $$0$1" ); // \[n] -> $[n]
117- s2 = std:: regex_replace (s2, std:: wregex (L" \\\\ 0" ), L" $$0" ); // \0 -> $0
118- s2 = std:: regex_replace (s2, std:: wregex (L" \\\\ n" ), L" \n " ); // \n -> [new line]
125+ s2 = regex_replace (s2, wregex (L" [$]" ), L" \\ $$" ); // $ -> \$
126+ s2 = regex_replace (s2, wregex (L" \uE000 (\\ d)" ), L" \uE000\uE001 $$$1\uE002 " ); // $n -> $(\n)
127+ s2 = regex_replace (s2, wregex (L" \\\\ ([1-9])" ), L" $$0$1" ); // \[n] -> $[n]
128+ s2 = regex_replace (s2, wregex (L" \\\\ 0" ), L" $$0" ); // \0 -> $0
129+ s2 = regex_replace (s2, wregex (L" \\\\ n" ), L" \n " ); // \n -> [new line]
119130 values.push_back (s2);
120131 }
121132 pos++;
@@ -145,27 +156,27 @@ void Soros::run(std::wstring& input, int& level, bool begin, bool end)
145156 {
146157 if ((!begin && begins[i]) || (!end && ends[i]))
147158 continue ;
148- if (!std:: regex_match (input, patterns[i]))
159+ if (!regex_match (input, patterns[i]))
149160 continue ;
150- input = std:: regex_replace (input, patterns[i], values[i]);
151- std:: wsmatch n;
152- while (std:: regex_search (input, n, func))
161+ input = regex_replace (input, patterns[i], values[i]);
162+ wsmatch n;
163+ while (regex_search (input, n, func))
153164 {
154165 bool b = false ;
155166 bool e = false ;
156167 if (n[1 ].str ()[0 ] == pipe[0 ] || n[0 ].str ()[0 ] == pipe[0 ])
157168 {
158169 b = true ;
159170 }
160- else if (n.position (0 ) == 0 )
171+ else if (n.position () == 0 )
161172 {
162173 b = begin;
163174 }
164175 if (n[1 ].str ().back () == pipe[0 ] || n[0 ].str ().back () == pipe[0 ])
165176 {
166177 e = true ;
167178 }
168- else if (n.position (0 ) + n[0 ].length () == (signed ) input.length ())
179+ else if (n.position () + n[0 ].length () == (signed ) input.length ())
169180 {
170181 e = end;
171182 }
@@ -193,5 +204,3 @@ std::wstring Soros::translate(
193204 replace (s, delim + ch, chars2.substr (i++, 1 ));
194205 return s;
195206}
196-
197-
0 commit comments