Skip to content

Commit 24b378d

Browse files
committed
optional boost::regex support
1 parent aee2aac commit 24b378d

File tree

7 files changed

+152
-100
lines changed

7 files changed

+152
-100
lines changed

configure.ac

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,33 +25,28 @@ dnl Checks for headers
2525
AC_HEADER_STDC
2626
AC_CHECK_HEADERS([inttypes.h stdint.h string.h])
2727

28-
AC_DEFINE([BOOST_ALL_NO_LIB], [], [Disable boost autolink.])
2928

3029
dnl Checks for typedefs, structures, and compiler characteristics.
3130
AC_LANG([C++])
3231
CXXFLAGS='-Wall -std=c++11'
3332

34-
AC_CHECK_HEADER([codecvt])
33+
AC_CHECK_HEADERS([codecvt regex])
3534

36-
AS_IF([test "x$ac_cv_header_codecvt" == xyes], [
37-
AS_IF([test "x$with_boost" != x], [
38-
BOOST_REQUIRE
39-
BOOST_LOCALE
40-
])
41-
AS_IF([test "x$with_boost" == x], [
42-
AC_DEFINE([HAVE_CODECVT], [1], [Define if codecvt is enabled, but not --with-boost.])
43-
])
35+
AS_IF([test "x$with_boost" != x], [
36+
AC_DEFINE([BOOST_ALL_NO_LIB], [], [Disable boost autolink.])
37+
BOOST_REQUIRE
38+
BOOST_LOCALE
39+
BOOST_REGEX
4440
])
4541

46-
AS_IF([test "x$ac_cv_header_codecvt" != "xyes"],
42+
AS_IF([test "x$ac_cv_header_codecvt" != xyes -o "x$ac_cv_header_regex" != xyes],
4743
[
48-
BOOST_REQUIRE
49-
BOOST_LOCALE
44+
AS_IF([test "x$with_boost" == x], [
45+
AC_MSG_ERROR([Missing std::codecvt or std::regex support, use GCC 4.9 (or greater) or --with-boost.])
46+
])
5047
])
5148

5249
dnl Checks for functions
53-
AC_FUNC_VPRINTF
54-
AC_CHECK_FUNCS([strdup strpbrk])
5550

5651
# ================
5752
# Check for cflags

src/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
AM_CXXFLAGS = -D_THREAD_SAFE -D_GNU_SOURCE -DVERBOSE
22

3+
LIBS += $(BOOST_REGEX_LIBS)
4+
35
libnumbertext_1_0_includedir = $(includedir)/libnumbertext
46
libnumbertext_1_0_include_HEADERS = \
57
Soros.hxx Numbertext.hxx \

src/Numbertext.cxx

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
1+
/* Soros interpreter (see numbertext.org)
2+
* 2018 (c) László Németh
3+
* License: LGPL/BSD dual license */
4+
15
#include <sstream>
26
#include <fstream>
3-
4-
#ifdef _MSC_VER
5-
#define HAVE_CODECVT
6-
#else
77
#include "config.h"
8-
#endif
98

10-
#ifdef HAVE_CODECVT
11-
#include <codecvt>
9+
#ifdef HAVE_BOOST_REGEX_HPP
10+
#include <boost/locale/encoding_utf.hpp>
11+
#include <boost/regex.hpp>
12+
using namespace boost;
1213
#else
13-
#include <boost/locale/encoding_utf.hpp>
14-
using boost::locale::conv::utf_to_utf;
14+
#include <codecvt>
15+
#include <regex>
16+
using namespace std;
1517
#endif
18+
1619
#include <locale>
1720
#include "Numbertext.hxx"
1821

@@ -45,13 +48,13 @@ bool Numbertext::load(std::string lang, std::string filename)
4548
{
4649
std::wstring module;
4750
if (filename.length() == 0)
48-
filename = prefix + std::regex_replace(lang,
49-
std::regex("-"), "_") + SOROS_EXT;
51+
filename = prefix + regex_replace(lang,
52+
regex("-"), "_") + SOROS_EXT;
5053
if (!readfile(filename, module))
5154
{
5255
// try to load without the country code
53-
filename = std::regex_replace(filename,
54-
std::regex("[-_].." SOROS_EXT "$"), SOROS_EXT);
56+
filename = regex_replace(filename,
57+
regex("[-_].." SOROS_EXT "$"), SOROS_EXT);
5558
if (!readfile(filename, module))
5659
return false;
5760
}
@@ -89,23 +92,22 @@ std::string Numbertext::numbertext(int number, std::string lang)
8992

9093
std::wstring Numbertext::string2wstring(const std::string& st)
9194
{
92-
#ifdef HAVE_CODECVT
95+
#ifndef HAVE_BOOST_REGEX_HPP
9396
typedef std::codecvt_utf8<wchar_t> convert_type;
9497
std::wstring_convert<convert_type, wchar_t> converter;
9598
return converter.from_bytes( st );
9699
#else
97-
return utf_to_utf<wchar_t>(st.c_str(), st.c_str() + st.size());
100+
return ::locale::conv::utf_to_utf<wchar_t>(st.c_str(), st.c_str() + st.size());
98101
#endif
99102
}
100103

101104
std::string Numbertext::wstring2string(const std::wstring& st)
102105
{
103-
#ifdef HAVE_CODECVT
106+
#ifndef HAVE_BOOST_REGEX_HPP
104107
typedef std::codecvt_utf8<wchar_t> convert_type;
105108
std::wstring_convert<convert_type, wchar_t> converter;
106109
return converter.to_bytes( st );
107110
#else
108-
return utf_to_utf<char>(st.c_str(), st.c_str() + st.size());
111+
return ::locale::conv::utf_to_utf<char>(st.c_str(), st.c_str() + st.size());
109112
#endif
110113
}
111-

src/Numbertext.hxx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
/* Soros interpreter (see numbertext.org)
2+
* 2018 (c) László Németh
3+
* License: LGPL/BSD dual license */
4+
15
#ifndef NUMBERTEXT_HXX
26
#define NUMBERTEXT_HXX
37

@@ -16,6 +20,7 @@ public:
1620
std::string numbertext(int number, std::string lang);
1721
static std::wstring string2wstring(const std::string& s);
1822
static std::string wstring2string(const std::wstring& s);
23+
1924
private:
2025
std::string prefix;
2126
std::unordered_map<std::string, Soros> modules;

src/Soros.cxx

Lines changed: 55 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
1+
/* Soros interpreter (see numbertext.org)
2+
* 2018 (c) László Németh
3+
* License: LGPL/BSD dual license */
4+
15
#include "Soros.hxx"
26

37
#define ITERATION_LIMIT 250
8+
#define SEP L"\uE00A"
9+
10+
#ifdef HAVE_BOOST_REGEX_HPP
11+
#define FIX L"\\"
12+
#else
13+
#define FIX L""
14+
#endif
415

516
const std::wstring Soros::m = L"\\\";#";
617
const std::wstring Soros::m2 = L"$()|[]";
@@ -9,7 +20,7 @@ const std::wstring Soros::c2 = L"\uE004\uE005\uE006\uE007\uE008\uE009";
920
const std::wstring Soros::slash = L"\uE000";
1021
const std::wstring Soros::pipe = L"\uE003";
1122
// pattern to recognize function calls in the replacement string
12-
const std::wregex Soros::func ( Soros::translate (
23+
const wregex Soros::func ( Soros::translate (
1324
L"(?:\\|?(?:\\$\\()+)?" // optional nested calls
1425
"(\\|?\\$\\(([^\\(\\)]*)\\)\\|?)" // inner call (2 subgroups)
1526
"(?:\\)+\\|?)?", // optional nested calls
@@ -30,64 +41,64 @@ Soros::Soros(std::wstring source, std::wstring filtered_lang):
3041
{
3142
source = translate(source, m, c, L"\\"); // \\, \", \;, \# -> \uE000..\uE003
3243
// switch off all country-dependent lines, and switch on the requested ones
33-
source = std::regex_replace(source, std::wregex(L"(^|[\n;])([^\n;#]*#[^\n]*\\[:[^\n:\\]]*:\\][^\n]*)"), L"$1#$2");
44+
source = regex_replace(source, wregex(L"(^|[\n;])([^\n;#]*#[^\n]*\\[:[^\n:\\]]*:\\][^\n]*)"), L"$1#$2");
3445
replace(filtered_lang, L"_", L"-");
35-
source = std::regex_replace(source, std::wregex(L"(^|[\n;])#([^\n;#]*#[^\n]*\\[:" + filtered_lang + L":\\][^\n]*)"), L"$1$2");
36-
source = std::regex_replace(source, std::wregex(L"(#[^\n]*)?(\n|$)"), L";"); // remove comments
46+
source = regex_replace(source, wregex(L"(^|[\n;])#([^\n;#]*#[^\n]*\\[:" + filtered_lang + L":\\][^\n]*)"), L"$1$2");
47+
source = regex_replace(source, wregex(L"(#[^\n]*)?(\n|$)"), L";"); // remove comments
3748
// __numbertext__ sets the place of left zero deletion rule
3849
if (source.find(L"__numbertext__") == std::wstring::npos)
3950
source.insert(0, L"__numbertext__;");
40-
source = std::regex_replace(source, std::wregex(L"__numbertext__"),
51+
source = regex_replace(source, wregex(L"__numbertext__"),
4152
// default left zero deletion
42-
L"\"([a-z][-a-z]* )?0+(0|[1-9]\\d*)\" $$(\\1\\2);"
53+
L"\"([a-z][-a-z]* )?0+(0|[1-9]" FIX L"\\d*)\" $$(" FIX L"\\1" FIX L"\\2);"
4354
// separator function
44-
L"\"\uE00A(.*)\uE00A(.+)\uE00A(.*)\" \\1\\2\\3;"
55+
SEP L"(.*)" SEP L"(.+)" SEP L"(.*) " FIX L"\\1" FIX L"\\2" FIX L"\\3;"
4556
// no separation, if subcall returns with empty string
46-
L"\"\uE00A.*\uE00A\uE00A.*\"");
57+
SEP L".*" SEP SEP L".*");
4758

48-
std::wregex p(L"^\\s*(\"[^\"]*\"|[^\\s]*)\\s*(.*[^\\s])?\\s*$");
49-
std::wregex macro(L"== *([^ ]*) *==");
59+
wregex p(L"^\\s*(\"[^\"]*\"|[^\\s]*)\\s*(.*[^\\s])?\\s*$");
60+
wregex macro(L"== *([^ ]*) *==");
5061
size_t pos = 0;
5162
size_t old_pos = 0;
52-
std::wregex quoteStart(L"^\"");
53-
std::wregex quoteEnd(L"\"$");
63+
wregex quoteStart(L"^\"");
64+
wregex quoteEnd(L"\"$");
5465
std::wstring smacro = L"";
5566
while ((pos = source.find(L";", pos)) != std::wstring::npos) {
56-
std::wsmatch sp;
67+
wsmatch sp;
5768
std::wstring linOrig = source.substr(old_pos, pos - old_pos);
5869
// pattern extension after == macro ==:
5970
// foo bar -> "macro foo" bar
6071
// "foo bar" baz -> "macro foo bar" baz
6172
// "^foo bar" baz -> "^macro foo bar" baz
6273
std::wstring lin = linOrig;
63-
if (smacro.length() > 0 && linOrig.length() > 0 && std::regex_search(linOrig, sp, p))
74+
if (smacro.length() > 0 && linOrig.length() > 0 && regex_search(linOrig, sp, p))
6475
{
65-
std::wstring s = std::regex_replace(sp[1].str(), quoteStart, L"");
66-
s = std::regex_replace(s, quoteEnd, L"");
76+
std::wstring s = regex_replace(sp[1].str(), quoteStart, L"");
77+
s = regex_replace(s, quoteEnd, L"");
6778
std::wstring sEmpty = (s.length() == 0) ? L"" : L" ";
6879
if (s[0] == L'^') {
69-
s = std::regex_replace(s, std::wregex(L"^\\^"), L"");
80+
s = regex_replace(s, wregex(L"^\\^"), L"");
7081
lin = L"\"^" + smacro + sEmpty + s + L"\" " + sp[2].str();
7182
} else
7283
lin = L"\"" + smacro + sEmpty + s + L"\" " + sp[2].str();
7384
}
74-
if (linOrig.length() > 0 && std::regex_match(linOrig, sp, macro))
85+
if (linOrig.length() > 0 && regex_match(linOrig, sp, macro))
7586
{
7687
smacro = sp[1].str();
7788
}
78-
else if (lin.length() > 0 && std::regex_search(lin, sp, p))
89+
else if (lin.length() > 0 && regex_search(lin, sp, p))
7990
{
80-
std::wstring s = std::regex_replace(sp[1].str(), quoteStart, L"");
81-
s = std::regex_replace(s, quoteEnd, L"");
91+
std::wstring s = regex_replace(sp[1].str(), quoteStart, L"");
92+
s = regex_replace(s, quoteEnd, L"");
8293
s = translate(s, c.substr(1), m.substr(1), L"");
8394
replace(s, slash, L"\\\\"); // -> \\, ", ;, #
8495
begins.push_back(s[0] == L'^');
8596
ends.push_back(s[s.length()-1] == L'$');
86-
s = L"^" + std::regex_replace(s, std::wregex(L"^\\^"), L"");
87-
s = std::regex_replace(s, std::wregex(L"\\$$"), L"") + L"$";
97+
s = L"^" + regex_replace(s, wregex(L"^\\^"), L"");
98+
s = regex_replace(s, wregex(L"\\$$"), L"") + L"$";
8899
try
89100
{
90-
patterns.push_back(std::wregex(s));
101+
patterns.push_back(wregex(s));
91102
} catch (...)
92103
{
93104
std::wcout << L"Soros: bad regex in \"" << sp[1].str() << "\"" << std::endl;
@@ -96,26 +107,26 @@ Soros::Soros(std::wstring source, std::wstring filtered_lang):
96107
std::wstring s2 = L"";
97108
if (sp.size() > 1)
98109
{
99-
s2 = std::regex_replace(sp[2].str(), quoteStart, L"");
100-
s2 = std::regex_replace(s2, quoteEnd, L"");
110+
s2 = regex_replace(sp[2].str(), quoteStart, L"");
111+
s2 = regex_replace(s2, quoteEnd, L"");
101112
}
102113
s2 = translate(s2, m2, c2, L"\\"); // \$, \(, \), \|, \[, \] -> \uE004..\uE009
103-
// call inner separator: [ ... $1 ... ] -> $(\uE00A ... \uE00A$1\uE00A ... )
104-
s2 = std::regex_replace(s2, std::wregex(L"^\\[[$](\\d\\d?|\\([^\\)]+\\))"),
105-
L"$$(\uE00A\uE00A|$$$1\uE00A"); // add "|" in terminating position
106-
s2 = std::regex_replace(s2, std::wregex(L"\\[([^$\\[\\\\]*)[$](\\d\\d?|\\([^\\)]+\\))"),
107-
L"$$(\uE00A$1\uE00A$$$2\uE00A");
108-
s2 = std::regex_replace(s2, std::wregex(L"\uE00A\\]$"), L"|\uE00A)"); // add "|" in terminating position
114+
// call inner separator: "[ ... $1 ... ]" -> "$(" SEP " ... " SEP "$1" SEP "... )"
115+
s2 = regex_replace(s2, wregex(L"^\\[[$](\\d\\d?|\\([^\\)]+\\))"),
116+
L"$$(" SEP SEP L"|$$$1" SEP); // add "|" in terminating position
117+
s2 = regex_replace(s2, wregex(L"\\[([^$\\[\\\\]*)[$](\\d\\d?|\\([^\\)]+\\))"),
118+
L"$$(" SEP L"$1" SEP L"$$$2" SEP);
119+
s2 = regex_replace(s2, wregex(SEP L"\\]$"), L"|" SEP L")"); // add "|" in terminating position
109120
s2 = translate(s2, L"]", L")", L"");
110-
s2 = std::regex_replace(s2, std::wregex(L"([$]\\d|\\))\\|[$]"), L"$1||$$"); // $()|$() -> $()||$()
121+
s2 = regex_replace(s2, wregex(L"([$]\\d|\\))\\|[$]"), L"$1||$$"); // $()|$() -> $()||$()
111122
s2 = translate(s2, c, m, L""); // \uE000..\uE003-> \, ", ;, #
112123
s2 = translate(s2, m2.substr(0, 4), c, L""); // $, (, ), | -> \uE000..\uE003
113124
s2 = translate(s2, c2, m2, L""); // \uE004..\uE007 -> $, (, ), |
114-
s2 = std::regex_replace(s2, std::wregex(L"[$]"), L"\\$$"); // $ -> \$
115-
s2 = std::regex_replace(s2, std::wregex(L"\uE000(\\d)"), L"\uE000\uE001$$$1\uE002"); // $n -> $(\n)
116-
s2 = std::regex_replace(s2, std::wregex(L"\\\\([1-9])"), L"$$0$1"); // \[n] -> $[n]
117-
s2 = std::regex_replace(s2, std::wregex(L"\\\\0"), L"$$0"); // \0 -> $0
118-
s2 = std::regex_replace(s2, std::wregex(L"\\\\n"), L"\n"); // \n -> [new line]
125+
s2 = regex_replace(s2, wregex(L"[$]"), L"\\$$"); // $ -> \$
126+
s2 = regex_replace(s2, wregex(L"\uE000(\\d)"), L"\uE000\uE001$$$1\uE002"); // $n -> $(\n)
127+
s2 = regex_replace(s2, wregex(L"\\\\([1-9])"), L"$$0$1"); // \[n] -> $[n]
128+
s2 = regex_replace(s2, wregex(L"\\\\0"), L"$$0"); // \0 -> $0
129+
s2 = regex_replace(s2, wregex(L"\\\\n"), L"\n"); // \n -> [new line]
119130
values.push_back(s2);
120131
}
121132
pos++;
@@ -145,27 +156,27 @@ void Soros::run(std::wstring& input, int& level, bool begin, bool end)
145156
{
146157
if ((!begin && begins[i]) || (!end && ends[i]))
147158
continue;
148-
if (!std::regex_match(input, patterns[i]))
159+
if (!regex_match(input, patterns[i]))
149160
continue;
150-
input = std::regex_replace(input, patterns[i], values[i]);
151-
std::wsmatch n;
152-
while (std::regex_search(input, n, func))
161+
input = regex_replace(input, patterns[i], values[i]);
162+
wsmatch n;
163+
while (regex_search(input, n, func))
153164
{
154165
bool b = false;
155166
bool e = false;
156167
if (n[1].str()[0] == pipe[0] || n[0].str()[0] == pipe[0])
157168
{
158169
b = true;
159170
}
160-
else if (n.position(0) == 0)
171+
else if (n.position() == 0)
161172
{
162173
b = begin;
163174
}
164175
if (n[1].str().back() == pipe[0] || n[0].str().back() == pipe[0])
165176
{
166177
e = true;
167178
}
168-
else if (n.position(0) + n[0].length() == (signed) input.length())
179+
else if (n.position() + n[0].length() == (signed) input.length())
169180
{
170181
e = end;
171182
}
@@ -193,5 +204,3 @@ std::wstring Soros::translate(
193204
replace(s, delim + ch, chars2.substr(i++, 1));
194205
return s;
195206
}
196-
197-

src/Soros.hxx

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,23 @@
1+
/* Soros interpreter (see numbertext.org)
2+
* 2018 (c) László Németh
3+
* License: LGPL/BSD dual license */
4+
15
#ifndef SOROS_HXX_
26
#define SOROS_HXX_
37

48
#include <iostream>
59
#include <iterator>
610
#include <string>
7-
#include <regex>
11+
12+
#include "config.h"
13+
14+
#ifdef HAVE_BOOST_REGEX_HPP
15+
#include <boost/regex.hpp>
16+
using namespace boost;
17+
#else
18+
#include <regex>
19+
using namespace std;
20+
#endif
821

922
class Soros {
1023

@@ -18,13 +31,13 @@ private:
1831
static void replace(std::wstring& s, const std::wstring& search,
1932
const std::wstring& replace);
2033

21-
std::vector<std::wregex> patterns;
34+
std::vector<wregex> patterns;
2235
std::vector<std::wstring> values;
2336
std::vector<bool> begins;
2437
std::vector<bool> ends;
2538

2639
static const std::wstring m, m2, c, c2, slash, pipe;
27-
static const std::wregex func;
40+
static const wregex func;
2841
};
2942

3043
#endif

0 commit comments

Comments
 (0)