Skip to content

Commit eafe22e

Browse files
committed
Merge remote-tracking branch 'origin/main' into nickrolfe/oj
2 parents 175958b + 2a32b59 commit eafe22e

File tree

103 files changed

+12840
-10341
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+12840
-10341
lines changed

autobuilder/src/main.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ fn main() -> std::io::Result<()> {
2626

2727
for line in env::var("LGTM_INDEX_FILTERS")
2828
.unwrap_or_default()
29-
.split("\n")
29+
.split('\n')
3030
{
31-
if line.starts_with("include:") {
32-
cmd.arg("--include").arg(&line[8..]);
33-
} else if line.starts_with("exclude:") {
34-
cmd.arg("--exclude").arg(&line[8..]);
31+
if let Some(stripped) = line.strip_prefix("include:") {
32+
cmd.arg("--include").arg(stripped);
33+
} else if let Some(stripped) = line.strip_prefix("exclude:") {
34+
cmd.arg("--exclude").arg(stripped);
3535
}
3636
}
3737
let exit = &cmd.spawn()?.wait()?;

extractor/src/extractor.rs

Lines changed: 37 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ pub fn extract(
165165
schema: &NodeTypeMap,
166166
trap_writer: &mut TrapWriter,
167167
path: &Path,
168-
source: &Vec<u8>,
168+
source: &[u8],
169169
ranges: &[Range],
170170
) -> std::io::Result<()> {
171171
let span = span!(
@@ -180,17 +180,16 @@ pub fn extract(
180180

181181
let mut parser = Parser::new();
182182
parser.set_language(language).unwrap();
183-
parser.set_included_ranges(&ranges).unwrap();
183+
parser.set_included_ranges(ranges).unwrap();
184184
let tree = parser.parse(&source, None).expect("Failed to parse file");
185185
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
186186
let file_label = &trap_writer.populate_file(path);
187187
let mut visitor = Visitor {
188-
source: &source,
189-
trap_writer: trap_writer,
188+
source,
189+
trap_writer,
190190
// TODO: should we handle path strings that are not valid UTF8 better?
191191
path: format!("{}", path.display()),
192192
file_label: *file_label,
193-
token_counter: 0,
194193
toplevel_child_counter: 0,
195194
stack: Vec::new(),
196195
language_prefix,
@@ -206,15 +205,7 @@ pub fn extract(
206205
/// HTML entities.
207206
fn escape_key<'a, S: Into<Cow<'a, str>>>(key: S) -> Cow<'a, str> {
208207
fn needs_escaping(c: char) -> bool {
209-
match c {
210-
'&' => true,
211-
'{' => true,
212-
'}' => true,
213-
'"' => true,
214-
'@' => true,
215-
'#' => true,
216-
_ => false,
217-
}
208+
matches!(c, '&' | '{' | '}' | '"' | '@' | '#')
218209
}
219210

220211
let key = key.into();
@@ -297,11 +288,9 @@ struct Visitor<'a> {
297288
/// source file.
298289
file_label: Label,
299290
/// The source code as a UTF-8 byte array
300-
source: &'a Vec<u8>,
291+
source: &'a [u8],
301292
/// A TrapWriter to accumulate trap entries
302293
trap_writer: &'a mut TrapWriter,
303-
/// A counter for tokens
304-
token_counter: usize,
305294
/// A counter for top-level child nodes
306295
toplevel_child_counter: usize,
307296
/// Language prefix
@@ -345,7 +334,7 @@ impl Visitor<'_> {
345334
full_error_message: String,
346335
node: Node,
347336
) {
348-
let (start_line, start_column, end_line, end_column) = location_for(&self.source, node);
337+
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
349338
let loc = self.trap_writer.location(
350339
self.file_label,
351340
start_line,
@@ -376,15 +365,15 @@ impl Visitor<'_> {
376365
let id = self.trap_writer.fresh_id();
377366

378367
self.stack.push((id, 0, Vec::new()));
379-
return true;
368+
true
380369
}
381370

382371
fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) {
383372
if node.is_error() || node.is_missing() {
384373
return;
385374
}
386375
let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
387-
let (start_line, start_column, end_line, end_column) = location_for(&self.source, node);
376+
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
388377
let loc = self.trap_writer.location(
389378
self.file_label,
390379
start_line,
@@ -425,13 +414,10 @@ impl Visitor<'_> {
425414
vec![
426415
Arg::Label(id),
427416
Arg::Int(*kind_id),
428-
Arg::Label(self.file_label),
429-
Arg::Int(self.token_counter),
430417
sliced_source_arg(self.source, node),
431418
Arg::Label(loc),
432419
],
433420
);
434-
self.token_counter += 1;
435421
}
436422
EntryKind::Table {
437423
fields,
@@ -446,11 +432,10 @@ impl Visitor<'_> {
446432
Arg::Int(parent_index),
447433
],
448434
);
449-
let mut all_args = Vec::new();
450-
all_args.push(Arg::Label(id));
435+
let mut all_args = vec![Arg::Label(id)];
451436
all_args.extend(args);
452437
all_args.push(Arg::Label(loc));
453-
self.trap_writer.add_tuple(&table_name, all_args);
438+
self.trap_writer.add_tuple(table_name, all_args);
454439
}
455440
}
456441
_ => {
@@ -485,8 +470,8 @@ impl Visitor<'_> {
485470
fn complex_node(
486471
&mut self,
487472
node: &Node,
488-
fields: &Vec<Field>,
489-
child_nodes: &Vec<ChildNode>,
473+
fields: &[Field],
474+
child_nodes: &[ChildNode],
490475
parent_id: Label,
491476
) -> Option<Vec<Arg>> {
492477
let mut map: Map<&Option<String>, (&Field, Vec<Arg>)> = Map::new();
@@ -523,22 +508,20 @@ impl Visitor<'_> {
523508
);
524509
self.record_parse_error_for_node(error_message, full_error_message, *node);
525510
}
526-
} else {
527-
if child_node.field_name.is_some() || child_node.type_name.named {
528-
let error_message = format!(
529-
"value for unknown field: {}::{} and type {:?}",
530-
node.kind(),
531-
&child_node.field_name.unwrap_or("child"),
532-
&child_node.type_name
533-
);
534-
let full_error_message = format!(
535-
"{}:{}: {}",
536-
&self.path,
537-
node.start_position().row + 1,
538-
error_message
539-
);
540-
self.record_parse_error_for_node(error_message, full_error_message, *node);
541-
}
511+
} else if child_node.field_name.is_some() || child_node.type_name.named {
512+
let error_message = format!(
513+
"value for unknown field: {}::{} and type {:?}",
514+
node.kind(),
515+
&child_node.field_name.unwrap_or("child"),
516+
&child_node.type_name
517+
);
518+
let full_error_message = format!(
519+
"{}:{}: {}",
520+
&self.path,
521+
node.start_position().row + 1,
522+
error_message
523+
);
524+
self.record_parse_error_for_node(error_message, full_error_message, *node);
542525
}
543526
}
544527
let mut args = Vec::new();
@@ -586,13 +569,12 @@ impl Visitor<'_> {
586569
);
587570
break;
588571
}
589-
let mut args = Vec::new();
590-
args.push(Arg::Label(parent_id));
572+
let mut args = vec![Arg::Label(parent_id)];
591573
if *has_index {
592574
args.push(Arg::Int(index))
593575
}
594576
args.push(child_value.clone());
595-
self.trap_writer.add_tuple(&table_name, args);
577+
self.trap_writer.add_tuple(table_name, args);
596578
}
597579
}
598580
}
@@ -610,13 +592,10 @@ impl Visitor<'_> {
610592
if tp == single_type {
611593
return true;
612594
}
613-
match &self.schema.get(single_type).unwrap().kind {
614-
EntryKind::Union { members } => {
615-
if self.type_matches_set(tp, members) {
616-
return true;
617-
}
595+
if let EntryKind::Union { members } = &self.schema.get(single_type).unwrap().kind {
596+
if self.type_matches_set(tp, members) {
597+
return true;
618598
}
619-
_ => {}
620599
}
621600
}
622601
node_types::FieldTypeInfo::Multiple { types, .. } => {
@@ -646,15 +625,15 @@ impl Visitor<'_> {
646625
}
647626

648627
// Emit a slice of a source file as an Arg.
649-
fn sliced_source_arg(source: &Vec<u8>, n: Node) -> Arg {
628+
fn sliced_source_arg(source: &[u8], n: Node) -> Arg {
650629
let range = n.byte_range();
651630
Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned())
652631
}
653632

654633
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
655634
// The first is the location and label definition, and the second is the
656635
// 'Located' entry.
657-
fn location_for<'a>(source: &Vec<u8>, n: Node) -> (usize, usize, usize, usize) {
636+
fn location_for(source: &[u8], n: Node) -> (usize, usize, usize, usize) {
658637
// Tree-sitter row, column values are 0-based while CodeQL starts
659638
// counting at 1. In addition Tree-sitter's row and column for the
660639
// end position are exclusive while CodeQL's end positions are inclusive.
@@ -812,18 +791,18 @@ impl fmt::Display for Arg {
812791
/// the string is sliced at the provided limit. If there is a multi-byte character
813792
/// at the limit then the returned slice will be slightly shorter than the limit to
814793
/// avoid splitting that multi-byte character.
815-
fn limit_string(string: &String, max_size: usize) -> &str {
794+
fn limit_string(string: &str, max_size: usize) -> &str {
816795
if string.len() <= max_size {
817796
return string;
818797
}
819-
let p = string.as_ptr();
798+
let p = string.as_bytes();
820799
let mut index = max_size;
821800
// We want to clip the string at [max_size]; however, the character at that position
822801
// may span several bytes. We need to find the first byte of the character. In UTF-8
823802
// encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte.
824803
// Therefore we decrement the index as long as there are bytes matching this pattern.
825804
// This ensures we cut the string at the border between one character and another.
826-
while index > 0 && unsafe { (*p.offset(index as isize) & 0b11000000) == 0b10000000 } {
805+
while index > 0 && (p[index] & 0b11000000) == 0b10000000 {
827806
index -= 1;
828807
}
829808
&string[0..index]

extractor/src/main.rs

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ mod extractor;
22

33
extern crate num_cpus;
44

5-
use clap;
65
use flate2::write::GzEncoder;
76
use rayon::prelude::*;
87
use std::fs;
@@ -57,7 +56,7 @@ impl TrapCompression {
5756
* (minimum of 1). If unspecified, should be considered as set to -1."
5857
*/
5958
fn num_codeql_threads() -> usize {
60-
let threads_str = std::env::var("CODEQL_THREADS").unwrap_or("-1".to_owned());
59+
let threads_str = std::env::var("CODEQL_THREADS").unwrap_or_else(|_| "-1".to_owned());
6160
match threads_str.parse::<i32>() {
6261
Ok(num) if num <= 0 => {
6362
let reduction = -num as usize;
@@ -124,6 +123,10 @@ fn main() -> std::io::Result<()> {
124123

125124
let language = tree_sitter_ruby::language();
126125
let erb = tree_sitter_embedded_template::language();
126+
// Look up tree-sitter kind ids now, to avoid string comparisons when scanning ERB files.
127+
let erb_directive_id = erb.id_for_node_kind("directive", true);
128+
let erb_output_directive_id = erb.id_for_node_kind("output_directive", true);
129+
let erb_code_id = erb.id_for_node_kind("code", true);
127130
let schema = node_types::read_node_types_str("ruby", tree_sitter_ruby::NODE_TYPES)?;
128131
let erb_schema =
129132
node_types::read_node_types_str("erb", tree_sitter_embedded_template::NODE_TYPES)?;
@@ -149,7 +152,13 @@ fn main() -> std::io::Result<()> {
149152
&[],
150153
)?;
151154

152-
let (ranges, line_breaks) = scan_erb(erb, &source);
155+
let (ranges, line_breaks) = scan_erb(
156+
erb,
157+
&source,
158+
erb_directive_id,
159+
erb_output_directive_id,
160+
erb_code_id,
161+
);
153162
for i in line_breaks {
154163
if i < source.len() {
155164
source[i] = b'\n';
@@ -181,12 +190,12 @@ fn main() -> std::io::Result<()> {
181190
}
182191

183192
fn write_trap(
184-
trap_dir: &PathBuf,
193+
trap_dir: &Path,
185194
path: PathBuf,
186195
trap_writer: extractor::TrapWriter,
187196
trap_compression: &TrapCompression,
188197
) -> std::io::Result<()> {
189-
let trap_file = path_for(&trap_dir, &path, trap_compression.extension());
198+
let trap_file = path_for(trap_dir, &path, trap_compression.extension());
190199
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
191200
let trap_file = std::fs::File::create(&trap_file)?;
192201
let mut trap_file = BufWriter::new(trap_file);
@@ -199,18 +208,24 @@ fn write_trap(
199208
}
200209
}
201210

202-
fn scan_erb(erb: Language, source: &std::vec::Vec<u8>) -> (Vec<Range>, Vec<usize>) {
211+
fn scan_erb(
212+
erb: Language,
213+
source: &[u8],
214+
directive_id: u16,
215+
output_directive_id: u16,
216+
code_id: u16,
217+
) -> (Vec<Range>, Vec<usize>) {
203218
let mut parser = Parser::new();
204219
parser.set_language(erb).unwrap();
205220
let tree = parser.parse(&source, None).expect("Failed to parse file");
206221
let mut result = Vec::new();
207222
let mut line_breaks = vec![];
208223

209224
for n in tree.root_node().children(&mut tree.walk()) {
210-
let kind = n.kind();
211-
if kind == "directive" || kind == "output_directive" {
225+
let kind_id = n.kind_id();
226+
if kind_id == directive_id || kind_id == output_directive_id {
212227
for c in n.children(&mut tree.walk()) {
213-
if c.kind() == "code" {
228+
if c.kind_id() == code_id {
214229
let mut range = c.range();
215230
if range.end_byte < source.len() {
216231
line_breaks.push(range.end_byte);
@@ -222,7 +237,7 @@ fn scan_erb(erb: Language, source: &std::vec::Vec<u8>) -> (Vec<Range>, Vec<usize
222237
}
223238
}
224239
}
225-
if result.len() == 0 {
240+
if result.is_empty() {
226241
let root = tree.root_node();
227242
// Add an empty range at the end of the file
228243
result.push(Range {

generator/src/dbscheme.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ impl<'a> fmt::Display for Table<'a> {
6868
}
6969
write!(f, "{}", key)?;
7070
}
71-
write!(f, "]\n")?;
71+
writeln!(f, "]")?;
7272
}
7373

74-
write!(f, "{}(\n", self.name)?;
74+
writeln!(f, "{}(", self.name)?;
7575
for (column_index, column) in self.columns.iter().enumerate() {
7676
write!(f, " ")?;
7777
if column.unique {
@@ -92,7 +92,7 @@ impl<'a> fmt::Display for Table<'a> {
9292
if column_index + 1 != self.columns.len() {
9393
write!(f, ",")?;
9494
}
95-
write!(f, "\n")?;
95+
writeln!(f)?;
9696
}
9797
write!(f, ");")?;
9898

0 commit comments

Comments
 (0)