Skip to content

Commit b1fd711

Browse files
authored
Merge pull request #7 from mkpro118/define-tokens
Add token definitions for prisma parser module
2 parents d54e940 + a078a05 commit b1fd711

File tree

4 files changed

+191
-0
lines changed

4 files changed

+191
-0
lines changed

src/core/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod parser;

src/core/parser/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod tokens;

src/core/parser/tokens.rs

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
//! Lexical token definitions for a Prisma-like schema scanner.
2+
//!
3+
//! This module declares token kinds, source coordinates, spans, and the `Token`
4+
//! container emitted by the lexer.
5+
//!
6+
//! # Model
7+
//! - `TokenType` enumerates the discrete kinds recognized by the scanner.
8+
//! Variants carrying `String` retain the associated source text.
9+
//! - `SymbolLocation` records a single position as `(line, column)`.
10+
//! - `SymbolSpan` records a contiguous region `[start, end]` in the scanner's
11+
//! coordinate system.
12+
//! - `Token` pairs a `TokenType` with a `SymbolSpan`.
13+
//!
14+
//! # Coordinates and spans
15+
//! Line and column units are implementation-defined and measured in the
16+
//! lexer's coordinate system. No guarantee is made about the inclusivity of
17+
//! the `end` bound; consumers should treat spans as opaque bounds reported by
18+
//! the lexer. The only invariant is that `start` does not follow `end` in the
19+
//! lexer's ordering.
20+
//!
21+
//! # Text payloads
22+
//! For variants that carry `String`, delimiter retention (for example, whether
23+
//! quotes or comment markers are included) is an implementation detail of the
24+
//! lexer and may vary across modes or inputs.
25+
26+
/// Lexical token kinds recognized by the scanner.
27+
///
28+
/// Each variant represents a distinct syntactic unit emitted by the lexer.
29+
/// Variants carrying `String` store the source text associated with the token.
30+
#[derive(Debug, PartialEq, Clone)]
31+
pub enum TokenType {
32+
// Keywords
33+
/// The `generator` keyword.
34+
Generator,
35+
/// The `datasource` keyword.
36+
DataSource,
37+
/// The `model` keyword.
38+
Model,
39+
/// The `enum` keyword.
40+
Enum,
41+
/// The `type` keyword.
42+
Type,
43+
44+
// Types
45+
/// The `String` type keyword.
46+
String,
47+
/// The `Int` type keyword.
48+
Int,
49+
/// The `Float` type keyword.
50+
Float,
51+
/// The `Boolean` type keyword.
52+
Boolean,
53+
/// The `DateTime` type keyword.
54+
DateTime,
55+
/// The `Json` type keyword.
56+
Json,
57+
/// The `Bytes` type keyword.
58+
Bytes,
59+
/// The `Decimal` type keyword.
60+
Decimal,
61+
62+
// Literals
63+
/// A literal value with its source text.
64+
Literal(String),
65+
/// An identifier with its source text.
66+
Identifier(String),
67+
68+
// Operators
69+
/// The assignment operator.
70+
Assign,
71+
/// The optional marker.
72+
Optional,
73+
/// The list-type marker.
74+
List,
75+
/// The dot operator.
76+
Dot,
77+
78+
// Punctuation
79+
/// The left brace `{`.
80+
LeftBrace,
81+
/// The right brace `}`.
82+
RightBrace,
83+
/// The left bracket `[`.
84+
LeftBracket,
85+
/// The right bracket `]`.
86+
RightBracket,
87+
/// The left parenthesis `(`.
88+
LeftParen,
89+
/// The right parenthesis `)`.
90+
RightParen,
91+
/// The comma `,`.
92+
Comma,
93+
/// The colon `:`.
94+
Colon,
95+
/// The at sign `@`.
96+
At,
97+
/// The double at sign `@@`.
98+
DoubleAt,
99+
100+
// Comments
101+
/// A comment with its text content.
102+
Comment(String),
103+
/// A documentation comment with its text content.
104+
DocComment(String),
105+
106+
// Unsupported
107+
/// A token that is not recognized by the set of tokens
108+
Unsupported(String),
109+
110+
// End of File
111+
/// End-of-input marker emitted after the final token.
112+
EOF,
113+
}
114+
115+
/// A position in the source text.
116+
///
117+
/// Positions are expressed as line and column numbers measured in the lexer's
118+
/// coordinate system.
119+
#[derive(Debug, Clone)]
120+
pub struct SymbolLocation {
121+
/// Line number of the position.
122+
pub line: u32,
123+
/// Column number of the position.
124+
pub column: u32,
125+
}
126+
127+
/// A contiguous range in the source text.
128+
///
129+
/// The span is bounded by a start and end location as recorded by the lexer.
130+
#[derive(Debug, Clone)]
131+
pub struct SymbolSpan {
132+
/// The start location of the span.
133+
pub start: SymbolLocation,
134+
/// The end location of the span.
135+
pub end: SymbolLocation,
136+
}
137+
138+
/// A lexical token with its kind and source span.
139+
#[derive(Debug, Clone)]
140+
pub struct Token {
141+
r#type: TokenType,
142+
span: SymbolSpan,
143+
}
144+
145+
impl Token {
146+
/// Constructs a new `Token` from a kind and start/end coordinates.
147+
///
148+
/// # Parameters
149+
///
150+
/// * `r#type` — The token kind.
151+
/// * `start` — The `(line, column)` of the token start.
152+
/// * `end` — The `(line, column)` of the token end.
153+
///
154+
/// # Panics
155+
///
156+
/// Panics if `start` does not precede or equal `end` component-wise.
157+
/// Specifically, this panics when `start.0 > end.0` or `start.1 > end.1`.
158+
#[must_use]
159+
pub fn new(r#type: TokenType, start: (u32, u32), end: (u32, u32)) -> Self {
160+
assert!(start.0 <= end.0 && start.1 <= end.1);
161+
Self {
162+
r#type,
163+
span: SymbolSpan {
164+
start: SymbolLocation {
165+
line: start.0,
166+
column: start.1,
167+
},
168+
end: SymbolLocation {
169+
line: end.0,
170+
column: end.1,
171+
},
172+
},
173+
}
174+
}
175+
176+
/// Returns the token kind.
177+
#[must_use]
178+
pub fn r#type(&self) -> &TokenType {
179+
&self.r#type
180+
}
181+
182+
/// Returns the span covered by the token.
183+
#[must_use]
184+
pub fn span(&self) -> &SymbolSpan {
185+
&self.span
186+
}
187+
}

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@
99
#![forbid(clippy::perf)]
1010
#![forbid(clippy::suspicious)]
1111
#![forbid(future_incompatible)]
12+
13+
pub mod core;

0 commit comments

Comments
 (0)