Skip to content

Commit 62119e5

Browse files
committed
Move data into databuffer object.
This simplifies the code a bit and abstracts away low-level data access.
1 parent b281d3f commit 62119e5

File tree

3 files changed

+125
-99
lines changed

3 files changed

+125
-99
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ config.mk: config.mk.in
5555

5656
libchecktestdata.o: config.mk
5757
libchecktestdata.o: $(PARSER_GEN)
58-
libchecktestdata.o: %.o: %.cc %.hpp parser.h
58+
libchecktestdata.o: %.o: %.cc %.hpp databuffer.hpp parser.h
5959

6060
checktestdata: CPPFLAGS += $(BOOST_CPPFLAGS)
6161
checktestdata: LDFLAGS += $(BOOST_LDFLAGS) $(STATIC_LINK_START) $(LIBGMPXX) $(STATIC_LINK_END)
@@ -106,7 +106,7 @@ coverage:
106106
$(MAKE) clean
107107
$(MAKE) CXXFLAGS='$(COVERAGE_CXXFLAGS)'
108108
$(MAKE) check
109-
gcov checktestdata.cc libchecktestdata.cc libchecktestdata.hpp
109+
gcov checktestdata.cc libchecktestdata.cc libchecktestdata.hpp databuffer.hpp
110110

111111
coverage-clean:
112112
rm -f *.gcda *.gcno *.gcov coverage*.html

databuffer.hpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
Libchecktestdata -- check testdata according to specification.
3+
4+
It's licensed under the 2-clause BSD license, see the file COPYING.
5+
6+
Input data buffer, wraps a string.
7+
*/
8+
9+
#include <string>
10+
#include <cctype>
11+
12+
namespace checktestdata {
13+
14+
int isspace_notnewline(char c) { return isspace(c) && c!='\n'; }
15+
16+
class databuffer {
17+
private:
18+
std::string data;
19+
size_t _pos, _line, _lpos;
20+
21+
public:
22+
databuffer() {}
23+
databuffer(std::string _data): data(_data), _pos(0), _line(0), _lpos(0) {}
24+
25+
bool eof() const { return _pos >= data.size(); }
26+
27+
size_t size() const { return data.size(); }
28+
29+
size_t pos() const { return _pos; }
30+
size_t line() const { return _line; }
31+
size_t lpos() const { return _lpos; }
32+
33+
std::string next(size_t length=1) const
34+
{
35+
size_t end = std::min(size(),_pos+length);
36+
return data.substr(_pos,end-_pos);
37+
}
38+
std::string prev(size_t length=1) const
39+
{
40+
size_t start = std::max(0LL,(long long)_pos-(long long)length);
41+
return data.substr(start,_pos-start);
42+
}
43+
44+
char peek(size_t ahead=0) const
45+
{
46+
if ( _pos+ahead>=size() ) return char();
47+
return data[_pos+ahead];
48+
}
49+
50+
char readchar() {
51+
char c = data[_pos++];
52+
if ( c=='\n' ) {
53+
_line++;
54+
_lpos = 0;
55+
} else {
56+
_lpos++;
57+
}
58+
return c;
59+
}
60+
61+
void readwhitespace()
62+
{
63+
while ( !eof() && isspace_notnewline(data[_pos]) ) {
64+
_pos++;
65+
_lpos++;
66+
}
67+
}
68+
};
69+
70+
} // namespace checktestdata

libchecktestdata.cc

Lines changed: 53 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
#include "parser.h"
3232
#include "libchecktestdata.hpp"
33+
#include "databuffer.hpp"
3334

3435
using namespace std;
3536

@@ -42,12 +43,12 @@ class generate_exception {};
4243
const int display_before_error = 65;
4344
const int display_after_error = 50;
4445

45-
size_t prognr, datanr, linenr, charnr, extra_ws;
46+
size_t prognr;
4647
command currcmd;
4748

4849
gmp_randclass gmp_rnd(gmp_randinit_default);
4950

50-
string data;
51+
databuffer data;
5152
vector<command> program;
5253

5354
// This stores array-type variables like x[i,j] as string "x" and
@@ -149,7 +150,7 @@ void readtestdata(istream &in)
149150
exit(exit_failure);
150151
}
151152

152-
data = ss.str();
153+
data = databuffer(ss.str());
153154
}
154155

155156
void error(string msg = string())
@@ -159,17 +160,14 @@ void error(string msg = string())
159160
throw generate_exception();
160161
}
161162

162-
size_t fr = max(0,int(datanr)-display_before_error);
163-
size_t to = min(data.size(),datanr+display_after_error);
164-
165-
debug("error at datanr = %d, %d - %d\n",(int)datanr,(int)fr,(int)to);
163+
debug("error at datanr = %d\n",(int)data.pos());
166164

167165
if ( !quiet ) {
168-
cerr << data.substr(fr,datanr-fr) << endl;
169-
cerr << string(min(charnr,(size_t)display_before_error),' ') << '^';
170-
cerr << data.substr(datanr,to-datanr) << endl << endl;
166+
cerr << data.prev(display_before_error) << endl;
167+
cerr << string(min(data.lpos(),(size_t)display_before_error),' ') << '^';
168+
cerr << data.next(display_after_error) << endl << endl;
171169

172-
cerr << "ERROR: line " << linenr+1 << " character " << charnr+1;
170+
cerr << "ERROR: line " << data.line()+1 << " character " << data.lpos()+1;
173171
cerr << " of testdata doesn't match " << currcmd;
174172
if ( msg.length()>0 ) cerr << ": " << msg;
175173
cerr << endl << endl;
@@ -589,12 +587,12 @@ bool dotest(const test& t)
589587
case 'E': if ( gendata ) {
590588
return (random() % 10 < 3);
591589
} else {
592-
return datanr>=data.size();
590+
return data.eof();
593591
}
594592
case 'M': if ( gendata ) {
595593
return (random() % 2 == 0);
596594
} else {
597-
return datanr<data.size() && t.args[0].val.find(data[datanr])!=string::npos;
595+
return !data.eof() && t.args[0].val.find(data.next())!=string::npos;
598596
}
599597
case 'U': return unique(t.args);
600598
case 'A': return inarray(t.args[0],t.args[1]);
@@ -605,46 +603,30 @@ bool dotest(const test& t)
605603
}
606604
}
607605

608-
int isspace_notnewline(char c) { return isspace(c) && c!='\n'; }
609-
610-
void readwhitespace()
611-
{
612-
while ( datanr<data.size() && isspace_notnewline(data[datanr]) ) {
613-
datanr++;
614-
charnr++;
615-
extra_ws++;
616-
}
617-
}
618-
619606
void checkspace()
620607
{
621-
if ( datanr>=data.size() ) error("end of file");
608+
if ( data.eof() ) error("end of file");
622609

623610
if ( whitespace_ok ) {
624611
// First check at least one space-like character
625-
if ( !isspace_notnewline(data[datanr++]) ) error();
626-
charnr++;
612+
if ( !isspace_notnewline(data.readchar()) ) error();
627613
// Then greedily read non-newline whitespace
628-
readwhitespace();
614+
data.readwhitespace();
629615
} else {
630-
if ( data[datanr++]!=' ' ) error();
631-
charnr++;
616+
if ( data.readchar()!=' ' ) error();
632617
}
633618
}
634619

635620
void checknewline()
636621
{
637622
// Trailing whitespace before newline
638-
if ( whitespace_ok ) readwhitespace();
623+
if ( whitespace_ok ) data.readwhitespace();
639624

640-
if ( datanr>=data.size() ) error("end of file");
641-
if ( data[datanr++]!='\n' ) error();
642-
linenr++;
643-
charnr=0;
625+
if ( data.eof() ) error("end of file");
626+
if ( data.readchar()!='\n' ) error();
644627

645628
// Leading whitespace after newline
646-
if ( whitespace_ok ) readwhitespace();
647-
629+
if ( whitespace_ok ) data.readwhitespace();
648630
}
649631

650632
#define MAX_MULT 10
@@ -834,8 +816,7 @@ void getdecrange(const command& cmd, int *decrange)
834816
void gentoken(command cmd, ostream &datastream)
835817
{
836818
currcmd = cmd;
837-
debug("generating token %s at %lu,%lu",
838-
cmd.name().c_str(),(unsigned long)linenr,(unsigned long)charnr);
819+
debug("generating token %s", cmd.name().c_str());
839820

840821
if ( cmd.name()=="SPACE" ) datastream << ' ';
841822

@@ -942,7 +923,7 @@ void checktoken(const command& cmd)
942923
{
943924
currcmd = cmd;
944925
debug("checking token %s at %lu,%lu",
945-
cmd.name().c_str(),(unsigned long)linenr,(unsigned long)charnr);
926+
cmd.name().c_str(),data.line(),data.lpos());
946927

947928
if ( cmd.name()=="SPACE" ) checkspace();
948929

@@ -952,12 +933,8 @@ void checktoken(const command& cmd)
952933
// Accepts format (0|-?[1-9][0-9]*), i.e. no leading zero's
953934
// and no '-0' accepted.
954935
string num;
955-
size_t len = 0;
956-
while ( datanr<data.size() &&
957-
(isdigit(data[datanr+len]) ||
958-
(num.size()==0 && data[datanr+len]=='-')) ) {
959-
num += data[datanr+len];
960-
len++;
936+
while ( isdigit(data.peek()) || (num.empty() && data.peek()=='-') ) {
937+
num += data.readchar();
961938
}
962939

963940
mpz_class lo = eval(cmd.args[0]);
@@ -977,9 +954,6 @@ void checktoken(const command& cmd)
977954

978955
if ( x<lo || x>hi ) error("value out of range");
979956
if ( cmd.nargs()>=3 ) setvar(cmd.args[2],value_t(x));
980-
981-
datanr += len;
982-
charnr += len;
983957
}
984958

985959
else if ( cmd.name()=="FLOAT" || cmd.name()=="FLOATP" ) {
@@ -1005,45 +979,33 @@ void checktoken(const command& cmd)
1005979
}
1006980
}
1007981

1008-
size_t start = datanr;
982+
size_t start = data.pos();
1009983
// Match optional minus sign:
1010-
if ( datanr<data.size() && data[datanr]=='-' ) { datanr++; charnr++; }
984+
if ( data.peek()=='-' ) data.readchar();
1011985
// Match base with optional decimal dot:
1012-
if ( datanr>=data.size() || !isdigit(data[datanr]) ) error("digit expected");
1013-
size_t digitpos = datanr, dotpos = string::npos;
1014-
while ( datanr<data.size() &&
1015-
(isdigit(data[datanr]) ||
1016-
(dotpos==string::npos && digitpos!=datanr && data[datanr]=='.')) ) {
1017-
if ( data[datanr]=='.' ) dotpos = datanr;
1018-
datanr++;
1019-
charnr++;
986+
if ( !isdigit(data.peek()) ) error("digit expected");
987+
size_t digitpos = data.pos(), dotpos = string::npos;
988+
char first_digit = data.peek();
989+
while ( (isdigit(data.peek()) ||
990+
(dotpos==string::npos && digitpos!=data.pos() && data.peek()=='.')) ) {
991+
if ( data.readchar()=='.' ) dotpos = data.pos()-1;
1020992
}
1021993
// Check that any dot is followed by digit:
1022-
if ( !isdigit(data[datanr-1]) ) error("digit expected");
994+
if ( !isdigit(data.peek(-1)) ) error("digit expected");
1023995

1024-
size_t exppos = datanr;
996+
size_t exppos = data.pos();
1025997
bool has_exp = false;
1026998
// Match exponent:
1027-
if ( opt==1 || (opt==0 && datanr<data.size() && toupper(data[datanr])=='E') ) {
1028-
if ( datanr>=data.size() || toupper(data[datanr])!='E' ) {
1029-
error("exponent 'E' expected");
1030-
}
999+
if ( opt==1 || (opt==0 && toupper(data.peek())=='E') ) {
1000+
if ( toupper(data.readchar())!='E' ) error("exponent 'E' expected");
10311001
has_exp = true;
1032-
datanr++;
1033-
charnr++;
1034-
if ( datanr<data.size() && (data[datanr]=='-' || data[datanr]=='+') ) {
1035-
datanr++;
1036-
charnr++;
1037-
}
1038-
while ( datanr<data.size() && isdigit(data[datanr]) ) {
1039-
datanr++;
1040-
charnr++;
1041-
}
1042-
if ( !isdigit(data[datanr-1]) ) error("digit expected");
1002+
if ( data.peek()=='-' || data.peek()=='+' ) data.readchar();
1003+
while ( isdigit(data.peek()) ) data.readchar();
1004+
if ( !isdigit(data.peek(-1)) ) error("digit expected");
10431005
}
10441006

10451007
if ( cmd.name()=="FLOATP" ) {
1046-
if ( has_exp && (data[digitpos]=='0' || dotpos!=digitpos+1) ) {
1008+
if ( has_exp && (first_digit=='0' || dotpos!=digitpos+1) ) {
10471009
error("exactly one non-zero before the decimal dot expected");
10481010
}
10491011
int ndecimals = (dotpos==string::npos ? 0 : exppos - dotpos - 1);
@@ -1054,7 +1016,7 @@ void checktoken(const command& cmd)
10541016
}
10551017
}
10561018

1057-
string matchstr = data.substr(start,datanr-start);
1019+
string matchstr = data.prev(data.pos()-start);
10581020

10591021
debug("parsing float '%s', exponent = %d",matchstr.c_str(),has_exp);
10601022

@@ -1070,10 +1032,8 @@ void checktoken(const command& cmd)
10701032
else if ( cmd.name()=="STRING" ) {
10711033
string str = eval(cmd.args[0]).getstr();
10721034
for (size_t i=0; i<str.size(); i++) {
1073-
if ( datanr>=data.size() ) error("premature end of file");
1074-
if ( data[datanr++]!=str[i] ) error();
1075-
charnr++;
1076-
if ( str[i]=='\n' ) linenr++, charnr=0;
1035+
if ( data.eof() ) error("premature end of file");
1036+
if ( data.readchar()!=str[i] ) error();
10771037
}
10781038

10791039
debug("'%s' = '%s'",str.c_str(),cmd.args[0].c_str());
@@ -1082,19 +1042,17 @@ void checktoken(const command& cmd)
10821042
else if ( cmd.name()=="REGEX" ) {
10831043
string str = eval(cmd.args[0]).getstr();
10841044
regex regexstr(str,regex::extended);
1085-
match_results<string::const_iterator> res;
1045+
smatch res;
10861046
string matchstr;
10871047

1088-
if ( !regex_search(data.cbegin()+datanr,data.cend(),
1089-
res,regexstr,regex_constants::match_continuous) ) {
1048+
string searchstr = data.next(data.size());
1049+
if ( !regex_search(searchstr,res,regexstr,regex_constants::match_continuous) ) {
10901050
error();
10911051
} else {
1092-
size_t matchend = size_t(res[0].second-data.begin());
1093-
matchstr = string(data.begin()+datanr,data.begin()+matchend);
1094-
for (; datanr<matchend; datanr++) {
1095-
charnr++;
1096-
if ( data[datanr]=='\n' ) linenr++, charnr=0;
1097-
}
1052+
size_t match_len = res[0].second - res[0].first;
1053+
size_t match_end = data.pos() + match_len;
1054+
matchstr = data.next(match_len);
1055+
while ( data.pos()<match_end ) data.readchar();
10981056
}
10991057
debug("'%s' = '%s'",matchstr.c_str(),str.c_str());
11001058

@@ -1136,7 +1094,7 @@ void checktestdata(ostream &datastream)
11361094
return;
11371095
} else {
11381096
debug("checking EOF");
1139-
if ( datanr++!=data.size() ) error();
1097+
if ( !data.eof() ) error();
11401098
throw eof_found_exception();
11411099
}
11421100
}
@@ -1282,10 +1240,8 @@ void init_checktestdata(std::istream &progstream, int opt_mask)
12821240
srandom(seed.get_ui());
12831241
gmp_rnd.seed(seed);
12841242

1285-
// Initialize current position in program and data.
1286-
linenr = charnr = 0;
1287-
datanr = prognr = 0;
1288-
extra_ws = 0;
1243+
// Initialize current position in program.
1244+
prognr = 0;
12891245
}
12901246

12911247
bool gentestdata(ostream &datastream)
@@ -1311,7 +1267,7 @@ bool checksyntax(istream &datastream)
13111267

13121268
// If we ignore whitespace, skip leading whitespace on first line
13131269
// as a special case; other lines are handled by checknewline().
1314-
if ( whitespace_ok ) readwhitespace();
1270+
if ( whitespace_ok ) data.readwhitespace();
13151271

13161272
try {
13171273
checktestdata(dummy);

0 commit comments

Comments
 (0)