diff --git a/pkgs/string_scanner/benchmark/benchmark.dart b/pkgs/string_scanner/benchmark/benchmark.dart new file mode 100644 index 000000000..a4564c34d --- /dev/null +++ b/pkgs/string_scanner/benchmark/benchmark.dart @@ -0,0 +1,104 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:benchmark_harness/benchmark_harness.dart'; +import 'package:string_scanner/string_scanner.dart'; + +final _string = 'This is a test string with some typical content.\n' * 50000; +final _word = RegExp(r'\w+'); +final _space = RegExp(r'\s+'); + +class StringScannerReadCharBenchmark extends BenchmarkBase { + StringScannerReadCharBenchmark() : super('StringScanner readChar'); + + @override + void run() { + final scanner = StringScanner(_string); + while (!scanner.isDone) { + scanner.readChar(); + } + } +} + +class LineScannerReadCharBenchmark extends BenchmarkBase { + LineScannerReadCharBenchmark() : super('LineScanner readChar'); + + @override + void run() { + final scanner = LineScanner(_string); + while (!scanner.isDone) { + scanner.readChar(); + } + } +} + +class SpanScannerReadCharBenchmark extends BenchmarkBase { + SpanScannerReadCharBenchmark() : super('SpanScanner readChar'); + + @override + void run() { + final scanner = SpanScanner(_string); + while (!scanner.isDone) { + scanner.readChar(); + } + } +} + +class StringScannerScanBenchmark extends BenchmarkBase { + StringScannerScanBenchmark() : super('StringScanner scan'); + + @override + void run() { + final scanner = StringScanner(_string); + while (!scanner.isDone) { + if (!scanner.scan(_word) && + !scanner.scanChar(10) && + !scanner.scan(_space)) { + scanner.readChar(); + } + } + } +} + +class LineScannerScanBenchmark extends BenchmarkBase { + LineScannerScanBenchmark() : super('LineScanner scan'); + + @override + void run() { + final scanner = LineScanner(_string); + while (!scanner.isDone) { + if (!scanner.scan(_word) && + !scanner.scanChar(10) && + !scanner.scan(_space)) { + scanner.readChar(); + } + } + } +} + +class SpanScannerScanBenchmark extends BenchmarkBase { + SpanScannerScanBenchmark() : super('SpanScanner scan'); + + @override + void run() { + final scanner = SpanScanner(_string); + while (!scanner.isDone) { + if (!scanner.scan(_word) && + !scanner.scanChar(10) && + !scanner.scan(_space)) { + scanner.readChar(); + } + } + } +} + +void main() { + print('String length: ${_string.length}'); + StringScannerReadCharBenchmark().report(); + LineScannerReadCharBenchmark().report(); + SpanScannerReadCharBenchmark().report(); + StringScannerScanBenchmark().report(); + LineScannerScanBenchmark().report(); + SpanScannerScanBenchmark().report(); +} diff --git a/pkgs/string_scanner/lib/src/line_scanner.dart b/pkgs/string_scanner/lib/src/line_scanner.dart index b18d61057..111ac78f0 100644 --- a/pkgs/string_scanner/lib/src/line_scanner.dart +++ b/pkgs/string_scanner/lib/src/line_scanner.dart @@ -8,10 +8,6 @@ import 'utils.dart'; // Note that much of this code is duplicated in eager_span_scanner.dart. -/// A regular expression matching newlines. A newline is either a `\n`, a `\r\n` -/// or a `\r` that is not immediately followed by a `\n`. -final _newlineRegExp = RegExp(r'\n|\r\n|\r(?!\n)'); - /// A subclass of [StringScanner] that tracks line and column information. class LineScanner extends StringScanner { /// The scanner's current (zero-based) line number. @@ -32,10 +28,6 @@ class LineScanner extends StringScanner { LineScannerState get state => LineScannerState._(this, position, line, column); - /// Whether the current position is between a CR character and an LF - /// charactet. - bool get _betweenCRLF => peekChar(-1) == $cr && peekChar() == $lf; - set state(LineScannerState state) { if (!identical(state._scanner, this)) { throw ArgumentError('The given LineScannerState was not returned by ' @@ -60,45 +52,68 @@ class LineScanner extends StringScanner { _line = 0; _column = 0; } else if (newPosition > oldPosition) { - final newlines = _newlinesIn(string.substring(oldPosition, newPosition), - endPosition: newPosition); - _line += newlines.length; - if (newlines.isEmpty) { + var newlines = 0; + var lastNewlineEnd = -1; + for (var i = oldPosition; i < newPosition; i++) { + final char = string.codeUnitAt(i); + if (char == $lf) { + newlines++; + lastNewlineEnd = i + 1; + } else if (char == $cr) { + final nextIsLf = + (i + 1 < newPosition && string.codeUnitAt(i + 1) == $lf) || + (i + 1 == newPosition && + newPosition < string.length && + string.codeUnitAt(newPosition) == $lf); + if (!nextIsLf) { + newlines++; + lastNewlineEnd = i + 1; + } + } + } + _line += newlines; + if (newlines == 0) { _column += newPosition - oldPosition; } else { - // The regex got a substring, so we need to account for where it started - // in the string. - final offsetOfLastNewline = oldPosition + newlines.last.end; - _column = newPosition - offsetOfLastNewline; + _column = newPosition - lastNewlineEnd; + } + } else { + var newlines = 0; + for (var i = newPosition; i < oldPosition; i++) { + final char = string.codeUnitAt(i); + if (char == $lf) { + newlines++; + } else if (char == $cr) { + if (i + 1 < oldPosition) { + if (string.codeUnitAt(i + 1) != $lf) newlines++; + } else { + // i + 1 == oldPosition + if (oldPosition >= string.length || + string.codeUnitAt(oldPosition) != $lf) { + newlines++; + } + } + } } - } else if (newPosition < oldPosition) { - final newlines = _newlinesIn(string.substring(newPosition, oldPosition), - endPosition: oldPosition); + _line -= newlines; - _line -= newlines.length; - if (newlines.isEmpty) { + if (newlines == 0) { _column -= oldPosition - newPosition; } else { - // To compute the new column, we need to locate the last newline before - // the new position. When searching, we must exclude the CR if we're - // between a CRLF because it's not considered a newline. - final crOffset = _betweenCRLF ? -1 : 0; - // Additionally, if we use newPosition as the end of the search and the - // character at that position itself (the next character) is a newline - // we should not use it, so also offset to account for that. - const currentCharOffset = -1; - final lastNewline = string.lastIndexOf( - _newlineRegExp, newPosition + currentCharOffset + crOffset); - - // Now we need to know the offset after the newline. This is the index - // above plus the length of the newline (eg. if we found `\r\n`) we need - // to add two. However if no newline was found, that index is 0. - final offsetAfterLastNewline = lastNewline == -1 - ? 0 - : string[lastNewline] == '\r' && string[lastNewline + 1] == '\n' - ? lastNewline + 2 - : lastNewline + 1; - + var offsetAfterLastNewline = 0; + for (var i = newPosition - 1; i >= 0; i--) { + final char = string.codeUnitAt(i); + if (char == $lf) { + offsetAfterLastNewline = i + 1; + break; + } else if (char == $cr) { + if (i + 1 < string.length && string.codeUnitAt(i + 1) == $lf) { + continue; + } + offsetAfterLastNewline = i + 1; + break; + } + } _column = newPosition - offsetAfterLastNewline; } } @@ -122,9 +137,16 @@ class LineScanner extends StringScanner { /// Adjusts [_line] and [_column] after having consumed [character]. void _adjustLineAndColumn(int character) { - if (character == $lf || (character == $cr && peekChar() != $lf)) { + if (character == $lf) { _line += 1; _column = 0; + } else if (character == $cr) { + if (position < string.length && string.codeUnitAt(position) == $lf) { + _column += 1; + } else { + _line += 1; + _column = 0; + } } else { _column += inSupplementaryPlane(character) ? 2 : 1; } @@ -134,35 +156,39 @@ class LineScanner extends StringScanner { bool scan(Pattern pattern) { if (!super.scan(pattern)) return false; - final newlines = _newlinesIn(lastMatch![0]!, endPosition: position); - _line += newlines.length; - if (newlines.isEmpty) { - _column += lastMatch![0]!.length; + final match = lastMatch![0]!; + var newlines = 0; + var lastNewlineEnd = -1; + for (var i = 0; i < match.length; i++) { + final char = match.codeUnitAt(i); + if (char == $lf) { + newlines++; + lastNewlineEnd = i + 1; + } else if (char == $cr) { + if (i + 1 < match.length) { + if (match.codeUnitAt(i + 1) != $lf) { + newlines++; + lastNewlineEnd = i + 1; + } + } else { + // i + 1 == match.length + if (position >= string.length || string.codeUnitAt(position) != $lf) { + newlines++; + lastNewlineEnd = i + 1; + } + } + } + } + + _line += newlines; + if (newlines == 0) { + _column += match.length; } else { - _column = lastMatch![0]!.length - newlines.last.end; + _column = match.length - lastNewlineEnd; } return true; } - - /// Returns a list of [Match]es describing all the newlines in [text], which - /// ends at [endPosition]. - /// - /// If [text] ends with `\r`, it will only be treated as a newline if the next - /// character at [position] is not a `\n`. - List _newlinesIn(String text, {required int endPosition}) { - final newlines = _newlineRegExp.allMatches(text).toList(); - // If the last character is a `\r` it will have been treated as a newline, - // but this is only valid if the next character is not a `\n`. - if (endPosition < string.length && - text.endsWith('\r') && - string[endPosition] == '\n') { - // newlines should never be empty here, because if `text` ends with `\r` - // it would have matched `\r(?!\n)` in the newline regex. - newlines.removeLast(); - } - return newlines; - } } /// A class representing the state of a [LineScanner]. diff --git a/pkgs/string_scanner/pubspec.yaml b/pkgs/string_scanner/pubspec.yaml index c03162ea2..b45b1d7a3 100644 --- a/pkgs/string_scanner/pubspec.yaml +++ b/pkgs/string_scanner/pubspec.yaml @@ -11,5 +11,6 @@ dependencies: source_span: ^1.8.0 dev_dependencies: + benchmark_harness: ^2.2.2 dart_flutter_team_lints: ^3.0.0 test: ^1.16.6 diff --git a/pkgs/string_scanner/test/line_scanner_test.dart b/pkgs/string_scanner/test/line_scanner_test.dart index 1af5c3666..794a2ce4b 100644 --- a/pkgs/string_scanner/test/line_scanner_test.dart +++ b/pkgs/string_scanner/test/line_scanner_test.dart @@ -430,6 +430,13 @@ void main() { expect(scanner.column, equals(1)); }); + test('backward from between CR LF to before CR LF', () { + scanner.expect('foo\nbar\r'); + scanner.position = 1; // "f" + expect(scanner.line, equals(0)); + expect(scanner.column, equals(1)); + }); + test('backward to after CR LF', () { scanner.expect('foo\nbar\r\nbaz'); scanner.position = 9; // "foo\nbar\r\n"