Skip to content

Commit d319cf3

Browse files
authored
v2: add lexer (#431)
Part of the v2 implementation. Going to prefix the crates with `squawk_` even though they aren't in the other repo. This should make it work with the existing crates which don't have a prefix.
1 parent e05c060 commit d319cf3

28 files changed

+1305
-4
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
[workspace]
22
members = ["crates/*"]
3+
resolver = "2"
4+
5+
[workspace.package]
6+
edition = "2021"
7+
rust-version = "1.81.0"
8+
authors = ["Squawk Team & Contributors"]
9+
license = "GPL-3.0"
310

411
[workspace.dependencies]
512
# third party
@@ -27,5 +34,15 @@ squawk-parser = { version = "0.0.0", path = "./crates/parser" }
2734
squawk-linter = { version = "0.0.0", path = "./crates/linter" }
2835
squawk-github = { version = "0.0.0", path = "./crates/github" }
2936

37+
[workspace.lints.clippy]
38+
collapsible_else_if = "allow"
39+
collapsible_if = "allow"
40+
needless_return = "allow"
41+
doc_markdown = "deny"
42+
manual_let_else = "deny"
43+
44+
[profile.dev]
45+
debug = 0
46+
3047
[profile.dev.package]
3148
insta.opt-level = 3

crates/linter/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[package]
22
name = "squawk-linter"
33
version = "0.0.0"
4-
authors = ["Steve Dignam <[email protected]>"]
5-
edition = "2018"
6-
license = "GPL-3.0"
4+
authors.workspace = true
5+
edition.workspace = true
6+
license.workspace = true
77
description = "Postgres SQL linter used in squawk"
88
repository = "https://github.com/sbdchd/squawk"
99
readme = "README.md"

crates/squawk_lexer/Cargo.toml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[package]
2+
name = "lexer"
3+
version = "0.0.0"
4+
description = "TBD"
5+
6+
authors.workspace = true
7+
edition.workspace = true
8+
license = "MIT"
9+
rust-version.workspace = true
10+
11+
[lib]
12+
doctest = false
13+
14+
[dependencies]
15+
16+
[dev-dependencies]
17+
insta.workspace = true
18+
19+
[lints]
20+
workspace = true

crates/squawk_lexer/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# lexer
2+
3+
> Adapted from the Rust lexer.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from: https://github.com/rust-lang/rust/blob/176e5452095444815207be02c16de0b1487a1b53/LICENSE-MIT
2+
3+
Permission is hereby granted, free of charge, to any
4+
person obtaining a copy of this software and associated
5+
documentation files (the "Software"), to deal in the
6+
Software without restriction, including without
7+
limitation the rights to use, copy, modify, merge,
8+
publish, distribute, sublicense, and/or sell copies of
9+
the Software, and to permit persons to whom the Software
10+
is furnished to do so, subject to the following
11+
conditions:
12+
13+
The above copyright notice and this permission notice
14+
shall be included in all copies or substantial portions
15+
of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18+
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19+
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20+
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21+
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24+
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25+
DEALINGS IN THE SOFTWARE.

crates/squawk_lexer/src/cursor.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
use std::str::Chars;
2+
3+
/// Peekable iterator over a char sequence.
4+
///
5+
/// Next characters can be peeked via `first` method,
6+
/// and position can be shifted forward via `bump` method.
7+
/// based on:
8+
/// - <https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/cursor.rs>
9+
/// - <https://github.com/astral-sh/ruff/blob/d1079680bb29f6b797b5df15327195300f635f3c/crates/ruff_python_parser/src/lexer/cursor.rs>
10+
///
11+
pub(crate) struct Cursor<'a> {
12+
/// Iterator over chars. Slightly faster than a &str.
13+
chars: Chars<'a>,
14+
len_remaining: usize,
15+
}
16+
17+
pub(crate) const EOF_CHAR: char = '\0';
18+
19+
impl<'a> Cursor<'a> {
20+
pub(crate) fn new(input: &'a str) -> Cursor<'a> {
21+
Cursor {
22+
len_remaining: input.len(),
23+
chars: input.chars(),
24+
}
25+
}
26+
27+
/// Peeks the next symbol from the input stream without consuming it.
28+
/// If requested position doesn't exist, `EOF_CHAR` is returned.
29+
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
30+
/// it should be checked with `is_eof` method.
31+
pub(crate) fn first(&self) -> char {
32+
// `.next()` optimizes better than `.nth(0)`
33+
self.chars.clone().next().unwrap_or(EOF_CHAR)
34+
}
35+
36+
/// Checks if there is nothing more to consume.
37+
pub(crate) fn is_eof(&self) -> bool {
38+
self.chars.as_str().is_empty()
39+
}
40+
41+
/// Returns amount of already consumed symbols.
42+
pub(crate) fn pos_within_token(&self) -> u32 {
43+
(self.len_remaining - self.chars.as_str().len()) as u32
44+
}
45+
46+
/// Resets the number of bytes consumed to 0.
47+
pub(crate) fn reset_pos_within_token(&mut self) {
48+
self.len_remaining = self.chars.as_str().len();
49+
}
50+
51+
/// Moves to the next character.
52+
pub(crate) fn bump(&mut self) -> Option<char> {
53+
let c = self.chars.next()?;
54+
Some(c)
55+
}
56+
57+
/// Eats symbols while predicate returns true or until the end of file is reached.
58+
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
59+
// It was tried making optimized version of this for eg. line comments, but
60+
// LLVM can inline all of this and compile it down to fast iteration over bytes.
61+
while predicate(self.first()) && !self.is_eof() {
62+
self.bump();
63+
}
64+
}
65+
}

0 commit comments

Comments
 (0)