|
| 1 | +//! Lexical token definitions for a Prisma-like schema scanner. |
| 2 | +//! |
| 3 | +//! This module declares token kinds, source coordinates, spans, and the `Token` |
| 4 | +//! container emitted by the lexer. |
| 5 | +//! |
| 6 | +//! # Model |
| 7 | +//! - `TokenType` enumerates the discrete kinds recognized by the scanner. |
| 8 | +//! Variants carrying `String` retain the associated source text. |
| 9 | +//! - `SymbolLocation` records a single position as `(line, column)`. |
| 10 | +//! - `SymbolSpan` records a contiguous region `[start, end]` in the scanner's |
| 11 | +//! coordinate system. |
| 12 | +//! - `Token` pairs a `TokenType` with a `SymbolSpan`. |
| 13 | +//! |
| 14 | +//! # Coordinates and spans |
| 15 | +//! Line and column units are implementation-defined and measured in the |
| 16 | +//! lexer's coordinate system. No guarantee is made about the inclusivity of |
| 17 | +//! the `end` bound; consumers should treat spans as opaque bounds reported by |
| 18 | +//! the lexer. The only invariant is that `start` does not follow `end` in the |
| 19 | +//! lexer's ordering. |
| 20 | +//! |
| 21 | +//! # Text payloads |
| 22 | +//! For variants that carry `String`, delimiter retention (for example, whether |
| 23 | +//! quotes or comment markers are included) is an implementation detail of the |
| 24 | +//! lexer and may vary across modes or inputs. |
| 25 | +
|
| 26 | +/// Lexical token kinds recognized by the scanner. |
| 27 | +/// |
| 28 | +/// Each variant represents a distinct syntactic unit emitted by the lexer. |
| 29 | +/// Variants carrying `String` store the source text associated with the token. |
| 30 | +#[derive(Debug, PartialEq, Clone)] |
| 31 | +pub enum TokenType { |
| 32 | + // Keywords |
| 33 | + /// The `generator` keyword. |
| 34 | + Generator, |
| 35 | + /// The `datasource` keyword. |
| 36 | + DataSource, |
| 37 | + /// The `model` keyword. |
| 38 | + Model, |
| 39 | + /// The `enum` keyword. |
| 40 | + Enum, |
| 41 | + /// The `type` keyword. |
| 42 | + Type, |
| 43 | + |
| 44 | + // Types |
| 45 | + /// The `String` type keyword. |
| 46 | + String, |
| 47 | + /// The `Int` type keyword. |
| 48 | + Int, |
| 49 | + /// The `Float` type keyword. |
| 50 | + Float, |
| 51 | + /// The `Boolean` type keyword. |
| 52 | + Boolean, |
| 53 | + /// The `DateTime` type keyword. |
| 54 | + DateTime, |
| 55 | + /// The `Json` type keyword. |
| 56 | + Json, |
| 57 | + /// The `Bytes` type keyword. |
| 58 | + Bytes, |
| 59 | + /// The `Decimal` type keyword. |
| 60 | + Decimal, |
| 61 | + |
| 62 | + // Literals |
| 63 | + /// A literal value with its source text. |
| 64 | + Literal(String), |
| 65 | + /// An identifier with its source text. |
| 66 | + Identifier(String), |
| 67 | + |
| 68 | + // Operators |
| 69 | + /// The assignment operator. |
| 70 | + Assign, |
| 71 | + /// The optional marker. |
| 72 | + Optional, |
| 73 | + /// The list-type marker. |
| 74 | + List, |
| 75 | + /// The dot operator. |
| 76 | + Dot, |
| 77 | + |
| 78 | + // Punctuation |
| 79 | + /// The left brace `{`. |
| 80 | + LeftBrace, |
| 81 | + /// The right brace `}`. |
| 82 | + RightBrace, |
| 83 | + /// The left bracket `[`. |
| 84 | + LeftBracket, |
| 85 | + /// The right bracket `]`. |
| 86 | + RightBracket, |
| 87 | + /// The left parenthesis `(`. |
| 88 | + LeftParen, |
| 89 | + /// The right parenthesis `)`. |
| 90 | + RightParen, |
| 91 | + /// The comma `,`. |
| 92 | + Comma, |
| 93 | + /// The colon `:`. |
| 94 | + Colon, |
| 95 | + /// The at sign `@`. |
| 96 | + At, |
| 97 | + /// The double at sign `@@`. |
| 98 | + DoubleAt, |
| 99 | + |
| 100 | + // Comments |
| 101 | + /// A comment with its text content. |
| 102 | + Comment(String), |
| 103 | + /// A documentation comment with its text content. |
| 104 | + DocComment(String), |
| 105 | + |
| 106 | + // Unsupported |
| 107 | + /// A token that is not recognized by the set of tokens |
| 108 | + Unsupported(String), |
| 109 | + |
| 110 | + // End of File |
| 111 | + /// End-of-input marker emitted after the final token. |
| 112 | + EOF, |
| 113 | +} |
| 114 | + |
| 115 | +/// A position in the source text. |
| 116 | +/// |
| 117 | +/// Positions are expressed as line and column numbers measured in the lexer's |
| 118 | +/// coordinate system. |
| 119 | +#[derive(Debug, Clone)] |
| 120 | +pub struct SymbolLocation { |
| 121 | + /// Line number of the position. |
| 122 | + pub line: u32, |
| 123 | + /// Column number of the position. |
| 124 | + pub column: u32, |
| 125 | +} |
| 126 | + |
| 127 | +/// A contiguous range in the source text. |
| 128 | +/// |
| 129 | +/// The span is bounded by a start and end location as recorded by the lexer. |
| 130 | +#[derive(Debug, Clone)] |
| 131 | +pub struct SymbolSpan { |
| 132 | + /// The start location of the span. |
| 133 | + pub start: SymbolLocation, |
| 134 | + /// The end location of the span. |
| 135 | + pub end: SymbolLocation, |
| 136 | +} |
| 137 | + |
| 138 | +/// A lexical token with its kind and source span. |
| 139 | +#[derive(Debug, Clone)] |
| 140 | +pub struct Token { |
| 141 | + r#type: TokenType, |
| 142 | + span: SymbolSpan, |
| 143 | +} |
| 144 | + |
| 145 | +impl Token { |
| 146 | + /// Constructs a new `Token` from a kind and start/end coordinates. |
| 147 | + /// |
| 148 | + /// # Parameters |
| 149 | + /// |
| 150 | + /// * `r#type` — The token kind. |
| 151 | + /// * `start` — The `(line, column)` of the token start. |
| 152 | + /// * `end` — The `(line, column)` of the token end. |
| 153 | + /// |
| 154 | + /// # Panics |
| 155 | + /// |
| 156 | + /// Panics if `start` does not precede or equal `end` component-wise. |
| 157 | + /// Specifically, this panics when `start.0 > end.0` or `start.1 > end.1`. |
| 158 | + #[must_use] |
| 159 | + pub fn new(r#type: TokenType, start: (u32, u32), end: (u32, u32)) -> Self { |
| 160 | + assert!(start.0 <= end.0 && start.1 <= end.1); |
| 161 | + Self { |
| 162 | + r#type, |
| 163 | + span: SymbolSpan { |
| 164 | + start: SymbolLocation { |
| 165 | + line: start.0, |
| 166 | + column: start.1, |
| 167 | + }, |
| 168 | + end: SymbolLocation { |
| 169 | + line: end.0, |
| 170 | + column: end.1, |
| 171 | + }, |
| 172 | + }, |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + /// Returns the token kind. |
| 177 | + #[must_use] |
| 178 | + pub fn r#type(&self) -> &TokenType { |
| 179 | + &self.r#type |
| 180 | + } |
| 181 | + |
| 182 | + /// Returns the span covered by the token. |
| 183 | + #[must_use] |
| 184 | + pub fn span(&self) -> &SymbolSpan { |
| 185 | + &self.span |
| 186 | + } |
| 187 | +} |
0 commit comments