Move data into databuffer object.

eldering · eldering · commit 62119e568089 · 2018-10-21T13:41:19.000+02:00
This simplifies the code a bit and abstracts away low-level data access.
diff --git a/Makefile b/Makefile
@@ -55,7 +55,7 @@ config.mk: config.mk.in
 
 libchecktestdata.o: config.mk
 libchecktestdata.o: $(PARSER_GEN)
-libchecktestdata.o: %.o: %.cc %.hpp parser.h
+libchecktestdata.o: %.o: %.cc %.hpp databuffer.hpp parser.h
 
 checktestdata: CPPFLAGS += $(BOOST_CPPFLAGS)
 checktestdata: LDFLAGS  += $(BOOST_LDFLAGS) $(STATIC_LINK_START) $(LIBGMPXX) $(STATIC_LINK_END)
@@ -106,7 +106,7 @@ coverage:
 	$(MAKE) clean
 	$(MAKE) CXXFLAGS='$(COVERAGE_CXXFLAGS)'
 	$(MAKE) check
-	gcov checktestdata.cc libchecktestdata.cc libchecktestdata.hpp
+	gcov checktestdata.cc libchecktestdata.cc libchecktestdata.hpp databuffer.hpp
 
 coverage-clean:
 	rm -f *.gcda *.gcno *.gcov coverage*.html
diff --git a/databuffer.hpp b/databuffer.hpp
@@ -0,0 +1,70 @@
+/*
+   Libchecktestdata -- check testdata according to specification.
+
+   It's licensed under the 2-clause BSD license, see the file COPYING.
+
+   Input data buffer, wraps a string.
+*/
+
+#include <string>
+#include <cctype>
+
+namespace checktestdata {
+
+int isspace_notnewline(char c) { return isspace(c) && c!='\n'; }
+
+class databuffer {
+private:
+	std::string data;
+	size_t _pos, _line, _lpos;
+
+public:
+	databuffer() {}
+	databuffer(std::string _data): data(_data), _pos(0), _line(0), _lpos(0) {}
+
+	bool eof() const { return _pos >= data.size(); }
+
+	size_t size() const { return data.size(); }
+
+	size_t pos()  const { return _pos; }
+	size_t line() const { return _line; }
+	size_t lpos() const { return _lpos; }
+
+	std::string next(size_t length=1) const
+	{
+		size_t end = std::min(size(),_pos+length);
+		return data.substr(_pos,end-_pos);
+	}
+	std::string prev(size_t length=1) const
+	{
+		size_t start = std::max(0LL,(long long)_pos-(long long)length);
+		return data.substr(start,_pos-start);
+	}
+
+	char peek(size_t ahead=0) const
+	{
+		if ( _pos+ahead>=size() ) return char();
+		return data[_pos+ahead];
+	}
+
+	char readchar() {
+		char c = data[_pos++];
+		if ( c=='\n' ) {
+			_line++;
+			_lpos = 0;
+		} else {
+			_lpos++;
+		}
+		return c;
+	}
+
+	void readwhitespace()
+	{
+		while ( !eof() && isspace_notnewline(data[_pos]) ) {
+			_pos++;
+			_lpos++;
+		}
+	}
+};
+
+} // namespace checktestdata
diff --git a/libchecktestdata.cc b/libchecktestdata.cc
@@ -30,6 +30,7 @@
 
 #include "parser.h"
 #include "libchecktestdata.hpp"
+#include "databuffer.hpp"
 
 using namespace std;
 
@@ -42,12 +43,12 @@ class generate_exception {};
 const int display_before_error = 65;
 const int display_after_error  = 50;
 
-size_t prognr, datanr, linenr, charnr, extra_ws;
+size_t prognr;
 command currcmd;
 
 gmp_randclass gmp_rnd(gmp_randinit_default);
 
-string data;
+databuffer data;
 vector<command> program;
 
 // This stores array-type variables like x[i,j] as string "x" and
@@ -149,7 +150,7 @@ void readtestdata(istream &in)
 		exit(exit_failure);
 	}
 
-	data = ss.str();
+	data = databuffer(ss.str());
 }
 
 void error(string msg = string())
@@ -159,17 +160,14 @@ void error(string msg = string())
 		throw generate_exception();
 	}
 
-	size_t fr = max(0,int(datanr)-display_before_error);
-	size_t to = min(data.size(),datanr+display_after_error);
-
-	debug("error at datanr = %d, %d - %d\n",(int)datanr,(int)fr,(int)to);
+	debug("error at datanr = %d\n",(int)data.pos());
 
 	if ( !quiet ) {
-		cerr << data.substr(fr,datanr-fr) << endl;
-		cerr << string(min(charnr,(size_t)display_before_error),' ') << '^';
-		cerr << data.substr(datanr,to-datanr) << endl << endl;
+		cerr << data.prev(display_before_error) << endl;
+		cerr << string(min(data.lpos(),(size_t)display_before_error),' ') << '^';
+		cerr << data.next(display_after_error) << endl << endl;
 
-		cerr << "ERROR: line " << linenr+1 << " character " << charnr+1;
+		cerr << "ERROR: line " << data.line()+1 << " character " << data.lpos()+1;
 		cerr << " of testdata doesn't match " << currcmd;
 		if ( msg.length()>0 ) cerr << ": " << msg;
 		cerr << endl << endl;
@@ -589,12 +587,12 @@ bool dotest(const test& t)
 	case 'E': if ( gendata ) {
 			  return (random() % 10 < 3);
 		  } else {
-			  return datanr>=data.size();
+			  return data.eof();
 		  }
 	case 'M': if ( gendata ) {
 			  return (random() % 2 == 0);
 		  } else {
-			  return datanr<data.size() && t.args[0].val.find(data[datanr])!=string::npos;
+			  return !data.eof() && t.args[0].val.find(data.next())!=string::npos;
 		  }
 	case 'U': return unique(t.args);
 	case 'A': return inarray(t.args[0],t.args[1]);
@@ -605,46 +603,30 @@ bool dotest(const test& t)
 	}
 }
 
-int isspace_notnewline(char c) { return isspace(c) && c!='\n'; }
-
-void readwhitespace()
-{
-	while ( datanr<data.size() && isspace_notnewline(data[datanr]) ) {
-		datanr++;
-		charnr++;
-		extra_ws++;
-	}
-}
-
 void checkspace()
 {
-	if ( datanr>=data.size() ) error("end of file");
+	if ( data.eof() ) error("end of file");
 
 	if ( whitespace_ok ) {
 		// First check at least one space-like character
-		if ( !isspace_notnewline(data[datanr++]) ) error();
-		charnr++;
+		if ( !isspace_notnewline(data.readchar()) ) error();
 		// Then greedily read non-newline whitespace
-		readwhitespace();
+		data.readwhitespace();
 	} else {
-		if ( data[datanr++]!=' ' ) error();
-		charnr++;
+		if ( data.readchar()!=' ' ) error();
 	}
 }
 
 void checknewline()
 {
 	// Trailing whitespace before newline
-	if ( whitespace_ok ) readwhitespace();
+	if ( whitespace_ok ) data.readwhitespace();
 
-	if ( datanr>=data.size() ) error("end of file");
-	if ( data[datanr++]!='\n' ) error();
-	linenr++;
-	charnr=0;
+	if ( data.eof() ) error("end of file");
+	if ( data.readchar()!='\n' ) error();
 
 	// Leading whitespace after newline
-	if ( whitespace_ok ) readwhitespace();
-
+	if ( whitespace_ok ) data.readwhitespace();
 }
 
 #define MAX_MULT 10
@@ -834,8 +816,7 @@ void getdecrange(const command& cmd, int *decrange)
 void gentoken(command cmd, ostream &datastream)
 {
 	currcmd = cmd;
-	debug("generating token %s at %lu,%lu",
-	      cmd.name().c_str(),(unsigned long)linenr,(unsigned long)charnr);
+	debug("generating token %s", cmd.name().c_str());
 
 	if ( cmd.name()=="SPACE" ) datastream << ' ';
 
@@ -942,7 +923,7 @@ void checktoken(const command& cmd)
 {
 	currcmd = cmd;
 	debug("checking token %s at %lu,%lu",
-	      cmd.name().c_str(),(unsigned long)linenr,(unsigned long)charnr);
+	      cmd.name().c_str(),data.line(),data.lpos());
 
 	if ( cmd.name()=="SPACE" ) checkspace();
 
@@ -952,12 +933,8 @@ void checktoken(const command& cmd)
 		// Accepts format (0|-?[1-9][0-9]*), i.e. no leading zero's
 		// and no '-0' accepted.
 		string num;
-		size_t len = 0;
-		while ( datanr<data.size() &&
-		        (isdigit(data[datanr+len]) ||
-		         (num.size()==0 && data[datanr+len]=='-')) ) {
-			num += data[datanr+len];
-			len++;
+		while ( isdigit(data.peek()) || (num.empty() && data.peek()=='-') ) {
+			num += data.readchar();
 		}
 
 		mpz_class lo = eval(cmd.args[0]);
@@ -977,9 +954,6 @@ void checktoken(const command& cmd)
 
 		if ( x<lo || x>hi ) error("value out of range");
 		if ( cmd.nargs()>=3 ) setvar(cmd.args[2],value_t(x));
-
-		datanr += len;
-		charnr += len;
 	}
 
 	else if ( cmd.name()=="FLOAT" || cmd.name()=="FLOATP" ) {
@@ -1005,45 +979,33 @@ void checktoken(const command& cmd)
 			}
 		}
 
-		size_t start = datanr;
+		size_t start = data.pos();
 		// Match optional minus sign:
-		if ( datanr<data.size() && data[datanr]=='-' ) { datanr++; charnr++; }
+		if ( data.peek()=='-' ) data.readchar();
 		// Match base with optional decimal dot:
-		if ( datanr>=data.size() || !isdigit(data[datanr]) ) error("digit expected");
-		size_t digitpos = datanr, dotpos = string::npos;
-		while ( datanr<data.size() &&
-		        (isdigit(data[datanr]) ||
-		         (dotpos==string::npos && digitpos!=datanr && data[datanr]=='.')) ) {
-			if ( data[datanr]=='.' ) dotpos = datanr;
-			datanr++;
-			charnr++;
+		if ( !isdigit(data.peek()) ) error("digit expected");
+		size_t digitpos = data.pos(), dotpos = string::npos;
+		char first_digit = data.peek();
+		while ( (isdigit(data.peek()) ||
+		         (dotpos==string::npos && digitpos!=data.pos() && data.peek()=='.')) ) {
+			if ( data.readchar()=='.' ) dotpos = data.pos()-1;
 		}
 		// Check that any dot is followed by digit:
-		if ( !isdigit(data[datanr-1]) ) error("digit expected");
+		if ( !isdigit(data.peek(-1)) ) error("digit expected");
 
-		size_t exppos = datanr;
+		size_t exppos = data.pos();
 		bool has_exp = false;
 		// Match exponent:
-		if ( opt==1 || (opt==0 && datanr<data.size() && toupper(data[datanr])=='E') ) {
-			if ( datanr>=data.size() || toupper(data[datanr])!='E' ) {
-				error("exponent 'E' expected");
-			}
+		if ( opt==1 || (opt==0 && toupper(data.peek())=='E') ) {
+			if ( toupper(data.readchar())!='E' ) error("exponent 'E' expected");
 			has_exp = true;
-			datanr++;
-			charnr++;
-			if ( datanr<data.size() && (data[datanr]=='-' || data[datanr]=='+') ) {
-				datanr++;
-				charnr++;
-			}
-			while ( datanr<data.size() && isdigit(data[datanr]) ) {
-				datanr++;
-				charnr++;
-			}
-			if ( !isdigit(data[datanr-1]) ) error("digit expected");
+			if ( data.peek()=='-' || data.peek()=='+' ) data.readchar();
+			while ( isdigit(data.peek()) ) data.readchar();
+			if ( !isdigit(data.peek(-1)) ) error("digit expected");
 		}
 
 		if ( cmd.name()=="FLOATP" ) {
-			if ( has_exp && (data[digitpos]=='0' || dotpos!=digitpos+1) ) {
+			if ( has_exp && (first_digit=='0' || dotpos!=digitpos+1) ) {
 				error("exactly one non-zero before the decimal dot expected");
 			}
 			int ndecimals = (dotpos==string::npos ? 0 : exppos - dotpos - 1);
@@ -1054,7 +1016,7 @@ void checktoken(const command& cmd)
 			}
 		}
 
-		string matchstr = data.substr(start,datanr-start);
+		string matchstr = data.prev(data.pos()-start);
 
 		debug("parsing float '%s', exponent = %d",matchstr.c_str(),has_exp);
 
@@ -1070,10 +1032,8 @@ void checktoken(const command& cmd)
 	else if ( cmd.name()=="STRING" ) {
 		string str = eval(cmd.args[0]).getstr();
 		for (size_t i=0; i<str.size(); i++) {
-			if ( datanr>=data.size() ) error("premature end of file");
-			if ( data[datanr++]!=str[i] ) error();
-			charnr++;
-			if ( str[i]=='\n' ) linenr++, charnr=0;
+			if ( data.eof() ) error("premature end of file");
+			if ( data.readchar()!=str[i] ) error();
 		}
 
 		debug("'%s' = '%s'",str.c_str(),cmd.args[0].c_str());
@@ -1082,19 +1042,17 @@ void checktoken(const command& cmd)
 	else if ( cmd.name()=="REGEX" ) {
 		string str = eval(cmd.args[0]).getstr();
 		regex regexstr(str,regex::extended);
-		match_results<string::const_iterator> res;
+		smatch res;
 		string matchstr;
 
-		if ( !regex_search(data.cbegin()+datanr,data.cend(),
-		                   res,regexstr,regex_constants::match_continuous) ) {
+		string searchstr = data.next(data.size());
+		if ( !regex_search(searchstr,res,regexstr,regex_constants::match_continuous) ) {
 			error();
 		} else {
-			size_t matchend = size_t(res[0].second-data.begin());
-			matchstr = string(data.begin()+datanr,data.begin()+matchend);
-			for (; datanr<matchend; datanr++) {
-				charnr++;
-				if ( data[datanr]=='\n' ) linenr++, charnr=0;
-			}
+			size_t match_len = res[0].second - res[0].first;
+			size_t match_end = data.pos() + match_len;
+			matchstr = data.next(match_len);
+			while ( data.pos()<match_end ) data.readchar();
 		}
 		debug("'%s' = '%s'",matchstr.c_str(),str.c_str());
 
@@ -1136,7 +1094,7 @@ void checktestdata(ostream &datastream)
 				return;
 			} else {
 				debug("checking EOF");
-				if ( datanr++!=data.size() ) error();
+				if ( !data.eof() ) error();
 				throw eof_found_exception();
 			}
 		}
@@ -1282,10 +1240,8 @@ void init_checktestdata(std::istream &progstream, int opt_mask)
 	srandom(seed.get_ui());
 	gmp_rnd.seed(seed);
 
-	// Initialize current position in program and data.
-	linenr = charnr = 0;
-	datanr = prognr = 0;
-	extra_ws = 0;
+	// Initialize current position in program.
+	prognr = 0;
 }
 
 bool gentestdata(ostream &datastream)
@@ -1311,7 +1267,7 @@ bool checksyntax(istream &datastream)
 
 	// If we ignore whitespace, skip leading whitespace on first line
 	// as a special case; other lines are handled by checknewline().
-	if ( whitespace_ok ) readwhitespace();
+	if ( whitespace_ok ) data.readwhitespace();
 
 	try {
 		checktestdata(dummy);