Skip to content

Commit 23d263c

Browse files
committed
Pass spans from parser into AST
1 parent 09e9077 commit 23d263c

File tree

15 files changed

+309
-142
lines changed

15 files changed

+309
-142
lines changed

edb/_edgeql_parser.pyi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ class CSTNode:
5959
class Production:
6060
id: int
6161
args: list[CSTNode]
62+
start: int | None
63+
end: int | None
6264

6365
class Terminal:
6466
text: str

edb/common/parsing.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def decorator(func: Any):
9999

100100

101101
class Nonterm(parsing.Nonterm):
102+
span: Span
102103

103104
def __init_subclass__(cls, *, is_internal=False, **kwargs):
104105
"""Add docstrings to class and reduce functions
@@ -125,7 +126,6 @@ def __init_subclass__(cls, *, is_internal=False, **kwargs):
125126
for name, attr in cls.__dict__.items():
126127
if (name.startswith('reduce_') and
127128
isinstance(attr, types.FunctionType)):
128-
inline_index = getattr(attr, 'inline_index', None)
129129

130130
if attr.__doc__ is None:
131131
tokens = name.split('_')
@@ -138,14 +138,11 @@ def __init_subclass__(cls, *, is_internal=False, **kwargs):
138138
if prec is not None:
139139
doc += ' [{}]'.format(prec)
140140

141+
inline_index = getattr(attr, 'inline_index', None)
141142
attr = lambda self, *args, meth=attr: meth(self, *args)
142143
attr.__doc__ = doc
143-
144-
a = span.wrap_function_to_infer_spans(attr)
145-
146-
a.__doc__ = attr.__doc__
147-
a.inline_index = inline_index
148-
setattr(cls, name, a)
144+
attr.inline_index = inline_index
145+
setattr(cls, name, attr)
149146

150147

151148
class ListNonterm(Nonterm, is_internal=True):

edb/common/span.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,9 @@ def generic_visit(self, node):
298298
class SpanValidator(ast.NodeVisitor):
299299
def generic_visit(self, node):
300300
if getattr(node, 'span', None) is None:
301-
raise RuntimeError('node {} has no span'.format(node))
301+
from edb.edgeql import ast as qlast
302+
if not isinstance(node, (qlast.ShapeOperation, qlast.Options)):
303+
raise RuntimeError('node {} has no span'.format(node))
302304
super().generic_visit(node)
303305

304306

edb/edgeql-parser/edgeql-parser-python/src/parser.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ pub struct Production {
6969
id: usize,
7070
#[pyo3(get)]
7171
args: PyObject,
72+
#[pyo3(get)]
73+
start: Option<u64>,
74+
#[pyo3(get)]
75+
end: Option<u64>,
7276
}
7377

7478
#[pyclass]
@@ -195,6 +199,8 @@ impl<'py> IntoPyObject<'py> for ParserCSTNode<'_> {
195199
Production {
196200
id: prod.id,
197201
args: PyList::new(py, prod.args.iter().map(ParserCSTNode))?.into(),
202+
start: prod.span.map(|s| s.start),
203+
end: prod.span.map(|s| s.end),
198204
},
199205
)?),
200206
terminal: None,

edb/edgeql-parser/src/parser/cst.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub struct Terminal {
2929
pub struct Production<'a> {
3030
pub id: usize,
3131
pub args: &'a [CSTNode<'a>],
32+
pub span: Option<Span>,
3233

3334
/// When a production is inlined, its id is saved into the new production
3435
/// This is needed when matching CST nodes by production id.

edb/edgeql-parser/src/parser/mod.rs

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ impl<'s> Parser<'s> {
394394

395395
let value = CSTNode::Production(Production {
396396
id: reduce.production_id,
397+
span: get_span_of_nodes(args),
397398
args,
398399
inlined_ids: None,
399400
});
@@ -419,7 +420,7 @@ impl<'s> Parser<'s> {
419420
Some(ctx.alloc_slice_and_push(&new_prod.inlined_ids, inlined_id));
420421
}
421422

422-
extend_span(&mut value, span, ctx);
423+
expand_span(&mut value, span, ctx);
423424
} else {
424425
// place back
425426
value = CSTNode::Production(production);
@@ -607,38 +608,54 @@ impl<'a> StackNode<'a> {
607608
}
608609
}
609610

610-
fn get_span_of_nodes(args: &[CSTNode]) -> Option<Span> {
611-
let start = args.iter().find_map(|x| match x {
611+
/// Returns the span of syntactically ordered nodes. Panic on empty nodes.
612+
fn get_span_of_nodes(nodes: &[CSTNode]) -> Option<Span> {
613+
let start = nodes.iter().find_map(|x| match x {
612614
CSTNode::Terminal(t) => Some(t.span.start),
613-
CSTNode::Production(p) => get_span_of_nodes(p.args).map(|x| x.start),
614-
_ => None,
615+
CSTNode::Production(p) => Some(p.span?.start),
616+
CSTNode::Empty => panic!(),
615617
})?;
616-
let end = args.iter().rev().find_map(|x| match x {
618+
let end = nodes.iter().rev().find_map(|x| match x {
617619
CSTNode::Terminal(t) => Some(t.span.end),
618-
CSTNode::Production(p) => get_span_of_nodes(p.args).map(|x| x.end),
619-
_ => None,
620+
CSTNode::Production(p) => Some(p.span?.end),
621+
CSTNode::Empty => panic!(),
620622
})?;
621623
Some(Span { start, end })
622624
}
623625

624-
fn extend_span<'a>(value: &mut CSTNode<'a>, span: Option<Span>, ctx: &'a Context) {
626+
fn expand_span<'a>(value: &mut CSTNode<'a>, span: Option<Span>, ctx: &'a Context) {
625627
let Some(span) = span else {
626628
return;
627629
};
628630

629-
let CSTNode::Terminal(terminal) = value else {
630-
return;
631+
let curr_span = match value {
632+
CSTNode::Empty => panic!(),
633+
CSTNode::Terminal(t) => &t.span,
634+
CSTNode::Production(p) => {
635+
if let Some(span) = &p.span {
636+
span
637+
} else {
638+
p.span = Some(span);
639+
return;
640+
}
641+
}
631642
};
632643

633-
let mut new_term = terminal.clone();
634-
635-
if span.start < new_term.span.start {
636-
new_term.span.start = span.start;
637-
}
638-
if span.end > new_term.span.end {
639-
new_term.span.end = span.end;
644+
if curr_span.start <= span.start && span.end <= curr_span.end {
645+
return;
640646
}
641-
*terminal = ctx.alloc_terminal(new_term);
647+
648+
let span = span.extend(curr_span);
649+
650+
match value {
651+
CSTNode::Empty => panic!(),
652+
CSTNode::Terminal(t) => {
653+
let mut new_term = t.clone();
654+
new_term.span = span;
655+
*t = ctx.alloc_terminal(new_term);
656+
}
657+
CSTNode::Production(p) => p.span = Some(span),
658+
};
642659
}
643660

644661
const PARSER_COUNT_MAX: usize = 10;

edb/edgeql-parser/src/position.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@ impl Span {
7979
end: right.end,
8080
}
8181
}
82+
83+
pub fn extend(self, other: &Span) -> Span {
84+
Span {
85+
start: u64::min(self.start, other.start),
86+
end: u64::max(self.end, other.end),
87+
}
88+
}
8289
}
8390

8491
fn new_lines_in_fragment(data: &[u8]) -> u64 {

edb/edgeql/ast.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,14 @@ class Constant(BaseConstant):
278278
value: str
279279

280280
@classmethod
281-
def string(cls, value: str) -> Constant:
282-
return Constant(kind=ConstantKind.STRING, value=value)
281+
def string(cls, value: str, span: Span | None = None) -> Constant:
282+
return Constant(kind=ConstantKind.STRING, value=value, span=span)
283283

284284
@classmethod
285-
def boolean(cls, b: bool) -> Constant:
286-
return Constant(kind=ConstantKind.BOOLEAN, value=str(b).lower())
285+
def boolean(cls, b: bool, span: Span | None = None) -> Constant:
286+
return Constant(
287+
kind=ConstantKind.BOOLEAN, value=str(b).lower(), span=span
288+
)
287289

288290
@classmethod
289291
def integer(cls, i: int) -> Constant:

edb/edgeql/parser/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,23 @@ def _cst_to_ast(
236236
production_id = node.id
237237
non_term_type, method = productions[production_id]
238238
sym = non_term_type()
239+
240+
if node.start is not None and node.end is not None:
241+
sym.span = parsing.Span(
242+
filename=filename,
243+
buffer=source.text(),
244+
start=node.start,
245+
end=node.end,
246+
)
247+
else:
248+
sym.span = None
249+
239250
method(sym, *args)
240251

252+
# span
253+
if sym.span is not None and isinstance(sym.val, qlast.Base):
254+
sym.val.span = sym.span
255+
241256
# push into result stack
242257
result.append(sym)
243258

edb/edgeql/parser/grammar/commondl.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,8 @@ def reduce_USING_Identifier_EXPRESSION(self, _using, ident, _expression):
417417

418418

419419
class ProcessFunctionBlockMixin:
420+
span: parsing.Span
421+
420422
def _process_function_body(self, block, *, optional_using: bool=False):
421423
props: dict[str, typing.Any] = {}
422424

@@ -483,6 +485,7 @@ def _process_function_body(self, block, *, optional_using: bool=False):
483485
from_function=from_function,
484486
from_expr=from_expr,
485487
code=code,
488+
span=self.span,
486489
)
487490

488491
props['nativecode'] = nativecode

0 commit comments

Comments
 (0)