Skip to content

Commit b2ee580

Browse files
authored
Merge pull request github#15496 from github/nickrolfe/loc-fresh-ids
Tree-sitter extractors: use fresh IDs for locations
2 parents 5f729d5 + 514a92d commit b2ee580

File tree

2 files changed

+105
-45
lines changed

2 files changed

+105
-45
lines changed

shared/tree-sitter-extractor/src/extractor/mod.rs

Lines changed: 83 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,16 @@ fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label {
4343

4444
pub fn populate_empty_location(writer: &mut trap::Writer) {
4545
let file_label = populate_empty_file(writer);
46-
location(writer, file_label, 0, 0, 0, 0);
46+
global_location(
47+
writer,
48+
file_label,
49+
trap::Location {
50+
start_line: 0,
51+
start_column: 0,
52+
end_line: 0,
53+
end_column: 0,
54+
},
55+
);
4756
}
4857

4958
pub fn populate_parent_folders(
@@ -85,28 +94,54 @@ pub fn populate_parent_folders(
8594
}
8695
}
8796

88-
fn location(
97+
/** Get the label for the given location, defining it a global ID if it doesn't exist yet. */
98+
fn global_location(
8999
writer: &mut trap::Writer,
90100
file_label: trap::Label,
91-
start_line: usize,
92-
start_column: usize,
93-
end_line: usize,
94-
end_column: usize,
101+
location: trap::Location,
95102
) -> trap::Label {
96103
let (loc_label, fresh) = writer.global_id(&format!(
97104
"loc,{{{}}},{},{},{},{}",
98-
file_label, start_line, start_column, end_line, end_column
105+
file_label,
106+
location.start_line,
107+
location.start_column,
108+
location.end_line,
109+
location.end_column
99110
));
100111
if fresh {
101112
writer.add_tuple(
102113
"locations_default",
103114
vec![
104115
trap::Arg::Label(loc_label),
105116
trap::Arg::Label(file_label),
106-
trap::Arg::Int(start_line),
107-
trap::Arg::Int(start_column),
108-
trap::Arg::Int(end_line),
109-
trap::Arg::Int(end_column),
117+
trap::Arg::Int(location.start_line),
118+
trap::Arg::Int(location.start_column),
119+
trap::Arg::Int(location.end_line),
120+
trap::Arg::Int(location.end_column),
121+
],
122+
);
123+
}
124+
loc_label
125+
}
126+
127+
/** Get the label for the given location, creating it as a fresh ID if we haven't seen the location
128+
* yet for this file. */
129+
fn location_label(
130+
writer: &mut trap::Writer,
131+
file_label: trap::Label,
132+
location: trap::Location,
133+
) -> trap::Label {
134+
let (loc_label, fresh) = writer.location_label(location);
135+
if fresh {
136+
writer.add_tuple(
137+
"locations_default",
138+
vec![
139+
trap::Arg::Label(loc_label),
140+
trap::Arg::Label(file_label),
141+
trap::Arg::Int(location.start_line),
142+
trap::Arg::Int(location.start_column),
143+
trap::Arg::Int(location.end_line),
144+
trap::Arg::Int(location.end_column),
110145
],
111146
);
112147
}
@@ -245,26 +280,25 @@ impl<'a> Visitor<'a> {
245280
node: Node,
246281
status_page: bool,
247282
) {
248-
let (start_line, start_column, end_line, end_column) = location_for(self, node);
249-
let loc = location(
250-
self.trap_writer,
251-
self.file_label,
252-
start_line,
253-
start_column,
254-
end_line,
255-
end_column,
256-
);
283+
let loc = location_for(self, node);
284+
let loc_label = location_label(self.trap_writer, self.file_label, loc);
257285
let mut mesg = self.diagnostics_writer.new_entry(
258286
"parse-error",
259287
"Could not process some files due to syntax errors",
260288
);
261289
mesg.severity(diagnostics::Severity::Warning)
262-
.location(self.path, start_line, start_column, end_line, end_column)
290+
.location(
291+
self.path,
292+
loc.start_line,
293+
loc.start_column,
294+
loc.end_line,
295+
loc.end_column,
296+
)
263297
.message(message, args);
264298
if status_page {
265299
mesg.status_page();
266300
}
267-
self.record_parse_error(loc, &mesg);
301+
self.record_parse_error(loc_label, &mesg);
268302
}
269303

270304
fn enter_node(&mut self, node: Node) -> bool {
@@ -298,15 +332,8 @@ impl<'a> Visitor<'a> {
298332
return;
299333
}
300334
let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
301-
let (start_line, start_column, end_line, end_column) = location_for(self, node);
302-
let loc = location(
303-
self.trap_writer,
304-
self.file_label,
305-
start_line,
306-
start_column,
307-
end_line,
308-
end_column,
309-
);
335+
let loc = location_for(self, node);
336+
let loc_label = location_label(self.trap_writer, self.file_label, loc);
310337
let table = self
311338
.schema
312339
.get(&TypeName {
@@ -333,7 +360,7 @@ impl<'a> Visitor<'a> {
333360
trap::Arg::Label(id),
334361
trap::Arg::Label(parent_id),
335362
trap::Arg::Int(parent_index),
336-
trap::Arg::Label(loc),
363+
trap::Arg::Label(loc_label),
337364
],
338365
);
339366
self.trap_writer.add_tuple(
@@ -356,7 +383,7 @@ impl<'a> Visitor<'a> {
356383
trap::Arg::Label(id),
357384
trap::Arg::Label(parent_id),
358385
trap::Arg::Int(parent_index),
359-
trap::Arg::Label(loc),
386+
trap::Arg::Label(loc_label),
360387
],
361388
);
362389
let mut all_args = vec![trap::Arg::Label(id)];
@@ -366,14 +393,20 @@ impl<'a> Visitor<'a> {
366393
}
367394
_ => {
368395
self.record_parse_error(
369-
loc,
396+
loc_label,
370397
self.diagnostics_writer
371398
.new_entry(
372399
"parse-error",
373400
"Could not process some files due to syntax errors",
374401
)
375402
.severity(diagnostics::Severity::Warning)
376-
.location(self.path, start_line, start_column, end_line, end_column)
403+
.location(
404+
self.path,
405+
loc.start_line,
406+
loc.start_column,
407+
loc.end_line,
408+
loc.end_column,
409+
)
377410
.message(
378411
"Unknown table type: {}",
379412
&[diagnostics::MessageArg::Code(node.kind())],
@@ -555,7 +588,7 @@ fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg {
555588
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
556589
// The first is the location and label definition, and the second is the
557590
// 'Located' entry.
558-
fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize) {
591+
fn location_for(visitor: &mut Visitor, n: Node) -> trap::Location {
559592
// Tree-sitter row, column values are 0-based while CodeQL starts
560593
// counting at 1. In addition Tree-sitter's row and column for the
561594
// end position are exclusive while CodeQL's end positions are inclusive.
@@ -565,16 +598,16 @@ fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize)
565598
// the end column is 0 (start of a line). In such cases the end position must be
566599
// set to the end of the previous line.
567600
let start_line = n.start_position().row + 1;
568-
let start_col = n.start_position().column + 1;
601+
let start_column = n.start_position().column + 1;
569602
let mut end_line = n.end_position().row + 1;
570-
let mut end_col = n.end_position().column;
571-
if start_line > end_line || start_line == end_line && start_col > end_col {
603+
let mut end_column = n.end_position().column;
604+
if start_line > end_line || start_line == end_line && start_column > end_column {
572605
// the range is empty, clip it to sensible values
573606
end_line = start_line;
574-
end_col = start_col - 1;
575-
} else if end_col == 0 {
607+
end_column = start_column - 1;
608+
} else if end_column == 0 {
576609
let source = visitor.source;
577-
// end_col = 0 means that we are at the start of a line
610+
// end_column = 0 means that we are at the start of a line
578611
// unfortunately 0 is invalid as column number, therefore
579612
// we should update the end location to be the end of the
580613
// previous line
@@ -591,10 +624,10 @@ fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize)
591624
);
592625
}
593626
end_line -= 1;
594-
end_col = 1;
627+
end_column = 1;
595628
while index > 0 && source[index - 1] != b'\n' {
596629
index -= 1;
597-
end_col += 1;
630+
end_column += 1;
598631
}
599632
} else {
600633
visitor.diagnostics_writer.write(
@@ -612,7 +645,12 @@ fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize)
612645
);
613646
}
614647
}
615-
(start_line, start_col, end_line, end_col)
648+
trap::Location {
649+
start_line,
650+
start_column,
651+
end_line,
652+
end_column,
653+
}
616654
}
617655

618656
fn traverse(tree: &Tree, visitor: &mut Visitor) {

shared/tree-sitter-extractor/src/trap.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,23 @@ use std::path::Path;
55

66
use flate2::write::GzEncoder;
77

8+
#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash)]
9+
pub struct Location {
10+
pub start_line: usize,
11+
pub start_column: usize,
12+
pub end_line: usize,
13+
pub end_column: usize,
14+
}
15+
816
pub struct Writer {
917
/// The accumulated trap entries
1018
trap_output: Vec<Entry>,
1119
/// A counter for generating fresh labels
1220
counter: u32,
1321
/// cache of global keys
1422
global_keys: std::collections::HashMap<String, Label>,
23+
/// Labels for locations, which don't use global keys
24+
location_labels: std::collections::HashMap<Location, Label>,
1525
}
1626

1727
impl Writer {
@@ -20,6 +30,7 @@ impl Writer {
2030
counter: 0,
2131
trap_output: Vec::new(),
2232
global_keys: std::collections::HashMap::new(),
33+
location_labels: std::collections::HashMap::new(),
2334
}
2435
}
2536

@@ -50,6 +61,17 @@ impl Writer {
5061
(label, true)
5162
}
5263

64+
/// Gets the label for the given location. The first call for a given location will define it as
65+
/// a fresh (star) ID.
66+
pub fn location_label(&mut self, loc: Location) -> (Label, bool) {
67+
if let Some(label) = self.location_labels.get(&loc) {
68+
return (*label, false);
69+
}
70+
let label = self.fresh_id();
71+
self.location_labels.insert(loc, label);
72+
(label, true)
73+
}
74+
5375
pub fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
5476
self.trap_output
5577
.push(Entry::GenericTuple(table_name.to_owned(), args))

0 commit comments

Comments
 (0)