Skip to content

Commit ffc5cfd

Browse files
chrisethchristianparpart
authored andcommitted
Remove scanner from compiler stack.
1 parent af18b8a commit ffc5cfd

23 files changed

+275
-241
lines changed

liblangutil/ParserBase.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#pragma once
2525

2626
#include <liblangutil/Token.h>
27-
#include <liblangutil/Scanner.h>
2827
#include <memory>
2928
#include <string>
3029

@@ -50,8 +49,6 @@ class ParserBase
5049

5150
virtual ~ParserBase() = default;
5251

53-
std::shared_ptr<CharStream> source() const { return m_scanner->charStream(); }
54-
5552
protected:
5653
/// Utility class that creates an error and throws an exception if the
5754
/// recursion depth is too deep.

liblangutil/Scanner.cpp

Lines changed: 33 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -135,26 +135,11 @@ class LiteralScope
135135
bool m_complete;
136136
};
137137

138-
void Scanner::reset(CharStream _source)
139-
{
140-
m_source = make_shared<CharStream>(std::move(_source));
141-
m_sourceName = make_shared<string>(m_source->name());
142-
reset();
143-
}
144-
145-
void Scanner::reset(shared_ptr<CharStream> _source)
146-
{
147-
solAssert(_source.get() != nullptr, "You MUST provide a CharStream when resetting.");
148-
m_source = std::move(_source);
149-
m_sourceName = make_shared<string>(m_source->name());
150-
reset();
151-
}
152-
153138
void Scanner::reset()
154139
{
155-
m_source->reset();
140+
m_source.reset();
156141
m_kind = ScannerKind::Solidity;
157-
m_char = m_source->get();
142+
m_char = m_source.get();
158143
skipWhitespace();
159144
next();
160145
next();
@@ -163,7 +148,7 @@ void Scanner::reset()
163148

164149
void Scanner::setPosition(size_t _offset)
165150
{
166-
m_char = m_source->setPosition(_offset);
151+
m_char = m_source.setPosition(_offset);
167152
scanToken();
168153
next();
169154
next();
@@ -229,7 +214,7 @@ void Scanner::rescan()
229214
rollbackTo = static_cast<size_t>(m_tokens[Current].location.start);
230215
else
231216
rollbackTo = static_cast<size_t>(m_skippedComments[Current].location.start);
232-
m_char = m_source->rollback(m_source->position() - rollbackTo);
217+
m_char = m_source.rollback(m_source.position() - rollbackTo);
233218
next();
234219
next();
235220
next();
@@ -324,12 +309,12 @@ Token Scanner::skipSingleLineComment()
324309
{
325310
// Line terminator is not part of the comment. If it is a
326311
// non-ascii line terminator, it will result in a parser error.
327-
size_t startPosition = m_source->position();
312+
size_t startPosition = m_source.position();
328313
while (!isUnicodeLinebreak())
329314
if (!advance())
330315
break;
331316

332-
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
317+
ScannerError unicodeDirectionError = validateBiDiMarkup(m_source, startPosition);
333318
if (unicodeDirectionError != ScannerError::NoError)
334319
return setError(unicodeDirectionError);
335320

@@ -362,28 +347,28 @@ bool Scanner::tryScanEndOfLine()
362347
size_t Scanner::scanSingleLineDocComment()
363348
{
364349
LiteralScope literal(this, LITERAL_TYPE_COMMENT);
365-
size_t endPosition = m_source->position();
350+
size_t endPosition = m_source.position();
366351

367352
skipWhitespaceExceptUnicodeLinebreak();
368353

369354
while (!isSourcePastEndOfInput())
370355
{
371-
endPosition = m_source->position();
356+
endPosition = m_source.position();
372357
if (tryScanEndOfLine())
373358
{
374359
// Check if next line is also a single-line comment.
375360
// If any whitespaces were skipped, use source position before.
376361
if (!skipWhitespaceExceptUnicodeLinebreak())
377-
endPosition = m_source->position();
362+
endPosition = m_source.position();
378363

379-
if (!m_source->isPastEndOfInput(3) &&
380-
m_source->get(0) == '/' &&
381-
m_source->get(1) == '/' &&
382-
m_source->get(2) == '/')
364+
if (!m_source.isPastEndOfInput(3) &&
365+
m_source.get(0) == '/' &&
366+
m_source.get(1) == '/' &&
367+
m_source.get(2) == '/')
383368
{
384-
if (!m_source->isPastEndOfInput(4) && m_source->get(3) == '/')
369+
if (!m_source.isPastEndOfInput(4) && m_source.get(3) == '/')
385370
break; // "////" is not a documentation comment
386-
m_char = m_source->advanceAndGet(3);
371+
m_char = m_source.advanceAndGet(3);
387372
if (atEndOfLine())
388373
continue;
389374
addCommentLiteralChar('\n');
@@ -404,7 +389,7 @@ size_t Scanner::scanSingleLineDocComment()
404389

405390
Token Scanner::skipMultiLineComment()
406391
{
407-
size_t startPosition = m_source->position();
392+
size_t startPosition = m_source.position();
408393
while (!isSourcePastEndOfInput())
409394
{
410395
char prevChar = m_char;
@@ -415,7 +400,7 @@ Token Scanner::skipMultiLineComment()
415400
// multi-line comments are treated as whitespace.
416401
if (prevChar == '*' && m_char == '/')
417402
{
418-
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
403+
ScannerError unicodeDirectionError = validateBiDiMarkup(m_source, startPosition);
419404
if (unicodeDirectionError != ScannerError::NoError)
420405
return setError(unicodeDirectionError);
421406

@@ -442,32 +427,32 @@ Token Scanner::scanMultiLineDocComment()
442427
if (atEndOfLine())
443428
{
444429
skipWhitespace();
445-
if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '*')
430+
if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '*')
446431
{ // it is unknown if this leads to the end of the comment
447432
addCommentLiteralChar('*');
448433
advance();
449434
}
450-
else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) != '/')
435+
else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) != '/')
451436
{ // skip first '*' in subsequent lines
452-
m_char = m_source->advanceAndGet(1);
437+
m_char = m_source.advanceAndGet(1);
453438
if (atEndOfLine()) // ignores empty lines
454439
continue;
455440
if (charsAdded)
456441
addCommentLiteralChar('\n'); // corresponds to the end of previous line
457442
}
458-
else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/')
443+
else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
459444
{ // if after newline the comment ends, don't insert the newline
460-
m_char = m_source->advanceAndGet(2);
445+
m_char = m_source.advanceAndGet(2);
461446
endFound = true;
462447
break;
463448
}
464449
else if (charsAdded)
465450
addCommentLiteralChar('\n');
466451
}
467452

468-
if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/')
453+
if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
469454
{
470-
m_char = m_source->advanceAndGet(2);
455+
m_char = m_source.advanceAndGet(2);
471456
endFound = true;
472457
break;
473458
}
@@ -822,11 +807,11 @@ bool Scanner::isUnicodeLinebreak()
822807
if (0x0a <= m_char && m_char <= 0x0d)
823808
// line feed, vertical tab, form feed, carriage return
824809
return true;
825-
if (!m_source->isPastEndOfInput(1) && uint8_t(m_source->get(0)) == 0xc2 && uint8_t(m_source->get(1)) == 0x85)
810+
if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85)
826811
// NEL - U+0085, C2 85 in utf8
827812
return true;
828-
if (!m_source->isPastEndOfInput(2) && uint8_t(m_source->get(0)) == 0xe2 && uint8_t(m_source->get(1)) == 0x80 && (
829-
uint8_t(m_source->get(2)) == 0xa8 || uint8_t(m_source->get(2)) == 0xa9
813+
if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && (
814+
uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9
830815
))
831816
// LS - U+2028, E2 80 A8 in utf8
832817
// PS - U+2029, E2 80 A9 in utf8
@@ -836,7 +821,7 @@ bool Scanner::isUnicodeLinebreak()
836821

837822
Token Scanner::scanString(bool const _isUnicode)
838823
{
839-
size_t startPosition = m_source->position();
824+
size_t startPosition = m_source.position();
840825
char const quote = m_char;
841826
advance(); // consume quote
842827
LiteralScope literal(this, LITERAL_TYPE_STRING);
@@ -867,7 +852,7 @@ Token Scanner::scanString(bool const _isUnicode)
867852

868853
if (_isUnicode)
869854
{
870-
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
855+
ScannerError unicodeDirectionError = validateBiDiMarkup(m_source, startPosition);
871856
if (unicodeDirectionError != ScannerError::NoError)
872857
return setError(unicodeDirectionError);
873858
}
@@ -921,7 +906,7 @@ void Scanner::scanDecimalDigits()
921906
// May continue with decimal digit or underscore for grouping.
922907
do
923908
addLiteralCharAndAdvance();
924-
while (!m_source->isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
909+
while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
925910

926911
// Defer further validation of underscore to SyntaxChecker.
927912
}
@@ -967,15 +952,15 @@ Token Scanner::scanNumber(char _charSeen)
967952
scanDecimalDigits(); // optional
968953
if (m_char == '.')
969954
{
970-
if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_')
955+
if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
971956
{
972957
// Assume the input may be a floating point number with leading '_' in fraction part.
973958
// Recover by consuming it all but returning `Illegal` right away.
974959
addLiteralCharAndAdvance(); // '.'
975960
addLiteralCharAndAdvance(); // '_'
976961
scanDecimalDigits();
977962
}
978-
if (m_source->isPastEndOfInput() || !isDecimalDigit(m_source->get(1)))
963+
if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
979964
{
980965
// A '.' has to be followed by a number.
981966
literal.complete();
@@ -992,7 +977,7 @@ Token Scanner::scanNumber(char _charSeen)
992977
solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
993978
if (kind != DECIMAL)
994979
return setError(ScannerError::IllegalExponent);
995-
else if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_')
980+
else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
996981
{
997982
// Recover from wrongly placed underscore as delimiter in literal with scientific
998983
// notation by consuming until the end.

liblangutil/Scanner.h

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -100,17 +100,13 @@ class Scanner
100100
{
101101
friend class LiteralScope;
102102
public:
103-
explicit Scanner(std::shared_ptr<CharStream> _source) { reset(std::move(_source)); }
104-
explicit Scanner(CharStream _source = CharStream()) { reset(std::move(_source)); }
105-
106-
std::string const& source() const noexcept { return m_source->source(); }
107-
108-
std::shared_ptr<CharStream> charStream() noexcept { return m_source; }
109-
std::shared_ptr<CharStream const> charStream() const noexcept { return m_source; }
103+
explicit Scanner(CharStream& _source):
104+
m_source(_source),
105+
m_sourceName{std::make_shared<std::string>(_source.name())}
106+
{
107+
reset();
108+
}
110109

111-
/// Resets the scanner as if newly constructed with _source as input.
112-
void reset(CharStream _source);
113-
void reset(std::shared_ptr<CharStream> _source);
114110
/// Resets scanner to the start of input.
115111
void reset();
116112

@@ -201,8 +197,8 @@ class Scanner
201197
void addUnicodeAsUTF8(unsigned codepoint);
202198
///@}
203199

204-
bool advance() { m_char = m_source->advanceAndGet(); return !m_source->isPastEndOfInput(); }
205-
void rollback(size_t _amount) { m_char = m_source->rollback(_amount); }
200+
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
201+
void rollback(size_t _amount) { m_char = m_source.rollback(_amount); }
206202
/// Rolls back to the start of the current token and re-runs the scanner.
207203
void rescan();
208204

@@ -251,15 +247,15 @@ class Scanner
251247
bool isUnicodeLinebreak();
252248

253249
/// Return the current source position.
254-
size_t sourcePos() const { return m_source->position(); }
255-
bool isSourcePastEndOfInput() const { return m_source->isPastEndOfInput(); }
250+
size_t sourcePos() const { return m_source.position(); }
251+
bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); }
256252

257253
enum TokenIndex { Current, Next, NextNext };
258254

259255
TokenDesc m_skippedComments[3] = {}; // desc for the current, next and nextnext skipped comment
260256
TokenDesc m_tokens[3] = {}; // desc for the current, next and nextnext token
261257

262-
std::shared_ptr<CharStream> m_source;
258+
CharStream& m_source;
263259
std::shared_ptr<std::string const> m_sourceName;
264260

265261
ScannerKind m_kind = ScannerKind::Solidity;

libsolidity/ast/ASTJsonImporter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,8 @@ Json::Value ASTJsonImporter::member(Json::Value const& _node, string const& _nam
958958

959959
Token ASTJsonImporter::scanSingleToken(Json::Value const& _node)
960960
{
961-
langutil::Scanner scanner{langutil::CharStream(_node.asString(), "")};
961+
langutil::CharStream charStream(_node.asString(), "");
962+
langutil::Scanner scanner{charStream};
962963
astAssert(scanner.peekNextToken() == Token::EOS, "Token string is too long.");
963964
return scanner.currentToken();
964965
}

libsolidity/codegen/CompilerContext.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -434,14 +434,14 @@ void CompilerContext::appendInlineAssembly(
434434

435435
ErrorList errors;
436436
ErrorReporter errorReporter(errors);
437-
auto scanner = make_shared<langutil::Scanner>(langutil::CharStream(_assembly, _sourceName));
437+
langutil::CharStream charStream(_assembly, _sourceName);
438438
yul::EVMDialect const& dialect = yul::EVMDialect::strictAssemblyForEVM(m_evmVersion);
439439
optional<langutil::SourceLocation> locationOverride;
440440
if (!_system)
441441
locationOverride = m_asm->currentSourceLocation();
442442
shared_ptr<yul::Block> parserResult =
443443
yul::Parser(errorReporter, dialect, std::move(locationOverride))
444-
.parse(scanner, false);
444+
.parse(make_shared<langutil::Scanner>(charStream), false);
445445
#ifdef SOL_OUTPUT_ASM
446446
cout << yul::AsmPrinter(&dialect)(*parserResult) << endl;
447447
#endif
@@ -457,7 +457,7 @@ void CompilerContext::appendInlineAssembly(
457457
for (auto const& error: errorReporter.errors())
458458
// TODO if we have "locationOverride", it will be the wrong char stream,
459459
// but we do not have access to the solidity scanner.
460-
message += SourceReferenceFormatter::formatErrorInformation(*error, *scanner->charStream());
460+
message += SourceReferenceFormatter::formatErrorInformation(*error, charStream);
461461
message += "-------------------------------------------\n";
462462

463463
solAssert(false, message);
@@ -491,8 +491,8 @@ void CompilerContext::appendInlineAssembly(
491491
solAssert(m_generatedYulUtilityCode.empty(), "");
492492
m_generatedYulUtilityCode = yul::AsmPrinter(dialect)(*obj.code);
493493
string code = yul::AsmPrinter{dialect}(*obj.code);
494-
scanner = make_shared<langutil::Scanner>(langutil::CharStream(m_generatedYulUtilityCode, _sourceName));
495-
obj.code = yul::Parser(errorReporter, dialect).parse(scanner, false);
494+
langutil::CharStream charStream(m_generatedYulUtilityCode, _sourceName);
495+
obj.code = yul::Parser(errorReporter, dialect).parse(make_shared<Scanner>(charStream), false);
496496
*obj.analysisInfo = yul::AsmAnalyzer::analyzeStrictAssertCorrect(dialect, obj);
497497
}
498498

0 commit comments

Comments
 (0)