Skip to content

Commit d811999

Browse files
committed
Expose parsed source code comments
1 parent 4beea9a commit d811999

File tree

4 files changed

+335
-7
lines changed

4 files changed

+335
-7
lines changed

src/ast/comments.rs

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
// Licensed under the Apache License, Version 2.0 (the "License");
2+
// you may not use this file except in compliance with the License.
3+
// You may obtain a copy of the License at
4+
//
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
//
7+
// Unless required by applicable law or agreed to in writing, software
8+
// distributed under the License is distributed on an "AS IS" BASIS,
9+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
// See the License for the specific language governing permissions and
11+
// limitations under the License.
12+
13+
//! Provides a representation of source code comments in parsed SQL code.
14+
15+
#[cfg(not(feature = "std"))]
16+
use alloc::{string::String, vec::Vec};
17+
18+
use core::{
19+
ops::{Bound, Deref, RangeBounds},
20+
slice,
21+
};
22+
23+
use crate::tokenizer::{Location, Span};
24+
25+
/// An opaque container for comments from a parse SQL source code.
26+
#[derive(Default, Debug)]
27+
pub struct Comments(Vec<CommentWithSpan>);
28+
29+
impl Comments {
30+
pub(crate) fn push(&mut self, comment: CommentWithSpan) {
31+
debug_assert!(self
32+
.0
33+
.last()
34+
.map(|last| last.span < comment.span)
35+
.unwrap_or(true));
36+
self.0.push(comment);
37+
}
38+
39+
/// Finds comments starting within the given location range. The order of
40+
/// iterator reflects the order of the comments as encountered in the parsed
41+
/// source code.
42+
pub fn find<R: RangeBounds<Location>>(&self, range: R) -> Iter<'_> {
43+
let (start, end) = (
44+
self.start_index(range.start_bound()),
45+
self.end_index(range.end_bound()),
46+
);
47+
// ~ in case the user specified a rever range
48+
Iter(if start <= end {
49+
self.0[start..end].iter()
50+
} else {
51+
self.0[0..0].iter()
52+
})
53+
}
54+
55+
/// Find the index of the first comment starting "before" the given location.
56+
///
57+
/// The returned index is _inclusive._
58+
fn start_index(&self, location: Bound<&Location>) -> usize {
59+
match location {
60+
Bound::Included(location) => {
61+
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
62+
Ok(i) => i,
63+
Err(i) => i,
64+
}
65+
}
66+
Bound::Excluded(location) => {
67+
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
68+
Ok(i) => i + 1,
69+
Err(i) => i,
70+
}
71+
}
72+
Bound::Unbounded => 0,
73+
}
74+
}
75+
76+
/// Find the index of the first comment starting "after" the given location.
77+
///
78+
/// The returned index is _exclusive._
79+
fn end_index(&self, location: Bound<&Location>) -> usize {
80+
match location {
81+
Bound::Included(location) => {
82+
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
83+
Ok(i) => i + 1,
84+
Err(i) => i,
85+
}
86+
}
87+
Bound::Excluded(location) => {
88+
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
89+
Ok(i) => i,
90+
Err(i) => i,
91+
}
92+
}
93+
Bound::Unbounded => self.0.len(),
94+
}
95+
}
96+
}
97+
98+
impl From<Comments> for Vec<CommentWithSpan> {
99+
fn from(comments: Comments) -> Self {
100+
comments.0
101+
}
102+
}
103+
104+
/// A source code comment with information of its entire span.
105+
#[derive(Debug, Clone)]
106+
pub struct CommentWithSpan {
107+
/// The source code comment iself
108+
pub comment: Comment,
109+
/// The span of the comment including its markers
110+
pub span: Span,
111+
}
112+
113+
impl Deref for CommentWithSpan {
114+
type Target = Comment;
115+
116+
fn deref(&self) -> &Self::Target {
117+
&self.comment
118+
}
119+
}
120+
121+
/// A unified type of the different source code comment formats.
122+
#[derive(Debug, Clone)]
123+
pub enum Comment {
124+
/// A single line comment, typically introduced with a prefix and spanning
125+
/// until end-of-line or end-of-file in the source code.
126+
///
127+
/// Note: `content` will include the terminating new-line character, if any.
128+
SingleLine { content: String, prefix: String },
129+
130+
/// A multi-line comment, typically enclosed in `/* .. */` markers. The
131+
/// string represents the content excluding the markers.
132+
MultiLine(String),
133+
}
134+
135+
impl Comment {
136+
/// Retrieves the content of the comment as string slice.
137+
pub fn as_str(&self) -> &str {
138+
match self {
139+
Comment::SingleLine { content, prefix: _ } => content.as_str(),
140+
Comment::MultiLine(content) => content.as_str(),
141+
}
142+
}
143+
}
144+
145+
impl Deref for Comment {
146+
type Target = str;
147+
148+
fn deref(&self) -> &Self::Target {
149+
self.as_str()
150+
}
151+
}
152+
153+
/// An opaque iterator implementation over comments served by [Comments::find].
154+
pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>);
155+
156+
impl<'a> Iterator for Iter<'a> {
157+
type Item = &'a CommentWithSpan;
158+
159+
fn next(&mut self) -> Option<Self::Item> {
160+
self.0.next()
161+
}
162+
}
163+
164+
#[cfg(test)]
165+
mod tests {
166+
use super::*;
167+
168+
#[test]
169+
fn test_find() {
170+
let comments = {
171+
// ```
172+
// -- abc
173+
// /* hello */--, world
174+
// /* def
175+
// ghi
176+
// jkl
177+
// */
178+
// ```
179+
let mut c = Comments(Vec::new());
180+
c.push(CommentWithSpan {
181+
comment: Comment::SingleLine {
182+
content: " abc".into(),
183+
prefix: "--".into(),
184+
},
185+
span: Span::new((1, 1).into(), (1, 7).into()),
186+
});
187+
c.push(CommentWithSpan {
188+
comment: Comment::MultiLine(" hello ".into()),
189+
span: Span::new((2, 3).into(), (2, 14).into()),
190+
});
191+
c.push(CommentWithSpan {
192+
comment: Comment::SingleLine {
193+
content: ", world".into(),
194+
prefix: "--".into(),
195+
},
196+
span: Span::new((2, 14).into(), (2, 21).into()),
197+
});
198+
c.push(CommentWithSpan {
199+
comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()),
200+
span: Span::new((3, 3).into(), (7, 1).into()),
201+
});
202+
c
203+
};
204+
205+
fn find<R: RangeBounds<Location>>(comments: &Comments, range: R) -> Vec<&str> {
206+
comments.find(range).map(|c| c.as_str()).collect::<Vec<_>>()
207+
}
208+
209+
// ~ end-points only --------------------------------------------------
210+
assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new());
211+
assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]);
212+
assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]);
213+
assert_eq!(
214+
find(&comments, ..=Location::new(2, 3)),
215+
vec![" abc", " hello "]
216+
);
217+
assert_eq!(
218+
find(&comments, ..=Location::new(2, 3)),
219+
vec![" abc", " hello "]
220+
);
221+
assert_eq!(
222+
find(&comments, ..Location::new(2, 15)),
223+
vec![" abc", " hello ", ", world"]
224+
);
225+
226+
// ~ start-points only ------------------------------------------------
227+
assert_eq!(
228+
find(&comments, Location::new(1000, 1000)..),
229+
Vec::<&str>::new()
230+
);
231+
assert_eq!(
232+
find(&comments, Location::new(2, 14)..),
233+
vec![", world", " def\n ghi\n jkl\n"]
234+
);
235+
assert_eq!(
236+
find(&comments, Location::new(2, 15)..),
237+
vec![" def\n ghi\n jkl\n"]
238+
);
239+
assert_eq!(
240+
find(&comments, Location::new(0, 0)..),
241+
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
242+
);
243+
assert_eq!(
244+
find(&comments, Location::new(1, 1)..),
245+
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
246+
);
247+
248+
// ~ ranges -----------------------------------------------------------
249+
assert_eq!(
250+
find(&comments, Location::new(2, 1)..Location::new(1, 1)),
251+
Vec::<&str>::new()
252+
);
253+
assert_eq!(
254+
find(&comments, Location::new(1, 1)..Location::new(2, 3)),
255+
vec![" abc"]
256+
);
257+
assert_eq!(
258+
find(&comments, Location::new(1, 1)..=Location::new(2, 3)),
259+
vec![" abc", " hello "]
260+
);
261+
assert_eq!(
262+
find(&comments, Location::new(1, 1)..=Location::new(2, 10)),
263+
vec![" abc", " hello "]
264+
);
265+
assert_eq!(
266+
find(&comments, Location::new(1, 1)..=Location::new(2, 14)),
267+
vec![" abc", " hello ", ", world"]
268+
);
269+
assert_eq!(
270+
find(&comments, Location::new(1, 1)..Location::new(2, 15)),
271+
vec![" abc", " hello ", ", world"]
272+
);
273+
274+
// ~ find everything --------------------------------------------------
275+
assert_eq!(
276+
find(&comments, ..),
277+
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
278+
);
279+
}
280+
}

src/ast/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ mod query;
133133
mod spans;
134134
pub use spans::Spanned;
135135

136+
pub mod comments;
136137
mod trigger;
137138
mod value;
138139

src/ast/spans.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use core::iter;
2828
use crate::tokenizer::Span;
2929

3030
use super::{
31-
dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation,
31+
comments, dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation,
3232
AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget,
3333
AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef,
3434
ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements,
@@ -2468,6 +2468,12 @@ impl Spanned for OutputClause {
24682468
}
24692469
}
24702470

2471+
impl Spanned for comments::CommentWithSpan {
2472+
fn span(&self) -> Span {
2473+
self.span
2474+
}
2475+
}
2476+
24712477
#[cfg(test)]
24722478
pub mod tests {
24732479
use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect};

src/parser/mod.rs

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,17 @@ use recursion::RecursionCounter;
3232
use IsLateral::*;
3333
use IsOptional::*;
3434

35-
use crate::ast::helpers::{
36-
key_value_options::{
37-
KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter,
38-
},
39-
stmt_create_table::{CreateTableBuilder, CreateTableConfiguration},
40-
};
4135
use crate::ast::Statement::CreatePolicy;
4236
use crate::ast::*;
37+
use crate::ast::{
38+
comments,
39+
helpers::{
40+
key_value_options::{
41+
KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter,
42+
},
43+
stmt_create_table::{CreateTableBuilder, CreateTableConfiguration},
44+
},
45+
};
4346
use crate::dialect::*;
4447
use crate::keywords::{Keyword, ALL_KEYWORDS};
4548
use crate::tokenizer::*;
@@ -529,6 +532,44 @@ impl<'a> Parser<'a> {
529532
Parser::new(dialect).try_with_sql(sql)?.parse_statements()
530533
}
531534

535+
/// Parses the given `sql` into an Abstract Syntax Tree (AST), returning
536+
/// also encountered source code comments.
537+
///
538+
/// See [Parser::parse_sql].
539+
pub fn parse_sql_with_comments(
540+
dialect: &'a dyn Dialect,
541+
sql: &str,
542+
) -> Result<(Vec<Statement>, comments::Comments), ParserError> {
543+
let mut p = Parser::new(dialect).try_with_sql(sql)?;
544+
p.parse_statements().map(|stmts| (stmts, p.into_comments()))
545+
}
546+
547+
/// Consumes this parser returning comments from the parsed token stream.
548+
pub fn into_comments(self) -> comments::Comments {
549+
let mut comments = comments::Comments::default();
550+
for t in self.tokens.into_iter() {
551+
match t.token {
552+
Token::Whitespace(Whitespace::SingleLineComment { comment, prefix }) => {
553+
comments.push(comments::CommentWithSpan {
554+
comment: comments::Comment::SingleLine {
555+
content: comment,
556+
prefix,
557+
},
558+
span: t.span,
559+
});
560+
}
561+
Token::Whitespace(Whitespace::MultiLineComment(comment)) => {
562+
comments.push(comments::CommentWithSpan {
563+
comment: comments::Comment::MultiLine(comment),
564+
span: t.span,
565+
});
566+
}
567+
_ => {}
568+
}
569+
}
570+
comments
571+
}
572+
532573
/// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.),
533574
/// stopping before the statement separator, if any.
534575
pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {

0 commit comments

Comments
 (0)