Skip to content

Commit 918547d

Browse files
bors[bot]Veetaha
andauthored
Merge #2911
2911: Implement collecting errors while tokenizing r=matklad a=Veetaha Now we are collecting errors from `rustc_lexer` and returning them in `ParsedToken { token, error }` and `ParsedTokens { tokens, errors }` structures **([UPD]: this is now simplified, see updates bellow)**. The main changes are introduced in `ra_syntax/parsing/lexer.rs`. It now exposes the following functions and types: ```rust pub fn tokenize(text: &str) -> ParsedTokens; pub fn tokenize_append(text: &str, parsed_tokens_to_append_to: &mut ParsedTokens); pub fn first_token(text: &str) -> Option<ParsedToken>; // allows any number of tokens in text pub fn single_token(text: &str) -> Option<ParsedToken>; // allows only a single token in text pub struct ParsedToken { pub token: Token, pub error: Option<SyntaxError> } pub struct ParsedTokens { pub tokens: Vec<Token>, pub errors: Vec<SyntaxError> } pub enum TokenizeError { /* Simple enum which reflects rustc_lexer tokenization errors */ } ``` In the first commit I implemented it with iterators, but then decided that since this crate is ad hoc for `rust-analyzer` and we clearly see the places of its usage it would be better to simplify it to vectors. This is currently WIP, because I want to add tests for error messages generated by the lexer. I'd like to listen to you thoughts how to define these tests in `ra_syntax/test-data` dir. Related issues: #223 **[UPD]** After the PR review the API was simplified: ```rust pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>); // Both lex functions do not check for unescape errors pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)>; pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind>; // This will be removed in the next PR in favour of simlifying `SyntaxError` to `(String, TextRange)` pub enum TokenizeError { /* Simple enum which reflects rustc_lexer tokenization errors */ } // this is private, but may be made public if such demand would exist in future (least privilege principle) fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)>; ``` Co-authored-by: Veetaha <[email protected]>
2 parents b090ee5 + a3e5663 commit 918547d

File tree

160 files changed

+798
-172
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

160 files changed

+798
-172
lines changed

crates/ra_ide/src/references/rename.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
33
use hir::ModuleSource;
44
use ra_db::{RelativePath, RelativePathBuf, SourceDatabase, SourceDatabaseExt};
5-
use ra_syntax::{algo::find_node_at_offset, ast, tokenize, AstNode, SyntaxKind, SyntaxNode};
5+
use ra_syntax::{
6+
algo::find_node_at_offset, ast, lex_single_valid_syntax_kind, AstNode, SyntaxKind, SyntaxNode,
7+
};
68
use ra_text_edit::TextEdit;
79

810
use crate::{
@@ -17,11 +19,9 @@ pub(crate) fn rename(
1719
position: FilePosition,
1820
new_name: &str,
1921
) -> Option<RangeInfo<SourceChange>> {
20-
let tokens = tokenize(new_name);
21-
if tokens.len() != 1
22-
|| (tokens[0].kind != SyntaxKind::IDENT && tokens[0].kind != SyntaxKind::UNDERSCORE)
23-
{
24-
return None;
22+
match lex_single_valid_syntax_kind(new_name)? {
23+
SyntaxKind::IDENT | SyntaxKind::UNDERSCORE => (),
24+
_ => return None,
2525
}
2626

2727
let parse = db.parse(position.file_id);

crates/ra_mbe/src/subtree_source.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! FIXME: write short doc here
22
33
use ra_parser::{Token, TokenSource};
4-
use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*, T};
4+
use ra_syntax::{lex_single_valid_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T};
55
use std::cell::{Cell, Ref, RefCell};
66
use tt::buffer::{Cursor, TokenBuffer};
77

@@ -129,8 +129,9 @@ fn convert_delim(d: Option<tt::DelimiterKind>, closing: bool) -> TtToken {
129129
}
130130

131131
fn convert_literal(l: &tt::Literal) -> TtToken {
132-
let kind =
133-
classify_literal(&l.text).map(|tkn| tkn.kind).unwrap_or_else(|| match l.text.as_ref() {
132+
let kind = lex_single_valid_syntax_kind(&l.text)
133+
.filter(|kind| kind.is_literal())
134+
.unwrap_or_else(|| match l.text.as_ref() {
134135
"true" => T![true],
135136
"false" => T![false],
136137
_ => panic!("Fail to convert given literal {:#?}", &l),

crates/ra_syntax/src/algo.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ impl TreeDiff {
8181
/// Specifically, returns a map whose keys are descendants of `from` and values
8282
/// are descendants of `to`, such that `replace_descendants(from, map) == to`.
8383
///
84-
/// A trivial solution is a singletom map `{ from: to }`, but this function
84+
/// A trivial solution is a singleton map `{ from: to }`, but this function
8585
/// tries to find a more fine-grained diff.
8686
pub fn diff(from: &SyntaxNode, to: &SyntaxNode) -> TreeDiff {
8787
let mut buf = FxHashMap::default();

crates/ra_syntax/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ use crate::syntax_node::GreenNode;
4141
pub use crate::{
4242
algo::InsertPosition,
4343
ast::{AstNode, AstToken},
44-
parsing::{classify_literal, tokenize, Token},
44+
parsing::{
45+
lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token, TokenizeError,
46+
},
4547
ptr::{AstPtr, SyntaxNodePtr},
4648
syntax_error::{Location, SyntaxError, SyntaxErrorKind},
4749
syntax_node::{

crates/ra_syntax/src/parsing.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,23 @@ mod text_tree_sink;
77
mod reparsing;
88

99
use crate::{syntax_node::GreenNode, SyntaxError};
10+
use text_token_source::TextTokenSource;
11+
use text_tree_sink::TextTreeSink;
1012

11-
pub use self::lexer::{classify_literal, tokenize, Token};
13+
pub use lexer::*;
1214

1315
pub(crate) use self::reparsing::incremental_reparse;
1416

1517
pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
16-
let tokens = tokenize(&text);
17-
let mut token_source = text_token_source::TextTokenSource::new(text, &tokens);
18-
let mut tree_sink = text_tree_sink::TextTreeSink::new(text, &tokens);
18+
let (tokens, lexer_errors) = tokenize(&text);
19+
20+
let mut token_source = TextTokenSource::new(text, &tokens);
21+
let mut tree_sink = TextTreeSink::new(text, &tokens);
22+
1923
ra_parser::parse(&mut token_source, &mut tree_sink);
20-
tree_sink.finish()
24+
25+
let (tree, mut parser_errors) = tree_sink.finish();
26+
parser_errors.extend(lexer_errors);
27+
28+
(tree, parser_errors)
2129
}

0 commit comments

Comments
 (0)