Skip to content

Commit f57dd0a

Browse files
authored
Merge pull request github#17552 from github/aibaars/diagnostics
Rust: extract parse errors as diagnostics
2 parents 0ae10ec + 5714811 commit f57dd0a

File tree

14 files changed

+282
-20
lines changed

14 files changed

+282
-20
lines changed

config/identical-files.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,5 +355,9 @@
355355
"Python model summaries test extension": [
356356
"python/ql/test/library-tests/dataflow/model-summaries/InlineTaintTest.ext.yml",
357357
"python/ql/test/library-tests/dataflow/model-summaries/NormalDataflowTest.ext.yml"
358+
],
359+
"Diagnostics.qll": [
360+
"ruby/ql/lib/codeql/ruby/Diagnostics.qll",
361+
"rust/ql/lib/codeql/rust/Diagnostics.qll"
358362
]
359363
}

ruby/ql/lib/codeql/ruby/Diagnostics.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
/** Provides classes relating to extraction diagnostics. */
2+
13
private import codeql.Locations
24

35
/** A diagnostic emitted during extraction, such as a parse error */

rust/extractor/src/main.rs

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,48 @@
11
use anyhow::Context;
22
use ra_ap_ide_db::line_index::LineIndex;
33
use ra_ap_parser::Edition;
4+
use std::borrow::Cow;
45
mod archive;
56
mod config;
67
pub mod generated;
78
mod translate;
89
pub mod trap;
910
use ra_ap_syntax::ast::SourceFile;
10-
use ra_ap_syntax::AstNode;
11+
use ra_ap_syntax::{AstNode, SyntaxError, TextRange, TextSize};
12+
13+
fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
14+
let mut iter = v.utf8_chunks();
15+
let (first_valid, first_invalid) = if let Some(chunk) = iter.next() {
16+
let valid = chunk.valid();
17+
let invalid = chunk.invalid();
18+
if invalid.is_empty() {
19+
debug_assert_eq!(valid.len(), v.len());
20+
return (Cow::Borrowed(valid), None);
21+
}
22+
(valid, invalid)
23+
} else {
24+
return (Cow::Borrowed(""), None);
25+
};
26+
27+
const REPLACEMENT: &str = "\u{FFFD}";
28+
let error_start = first_valid.len() as u32;
29+
let error_end = error_start + first_invalid.len() as u32;
30+
let error_range = TextRange::new(TextSize::new(error_start), TextSize::new(error_end));
31+
let error = SyntaxError::new("invalid utf-8 sequence".to_owned(), error_range);
32+
let mut res = String::with_capacity(v.len());
33+
res.push_str(first_valid);
34+
35+
res.push_str(REPLACEMENT);
36+
37+
for chunk in iter {
38+
res.push_str(chunk.valid());
39+
if !chunk.invalid().is_empty() {
40+
res.push_str(REPLACEMENT);
41+
}
42+
}
43+
44+
(Cow::Owned(res), Some(error))
45+
}
1146

1247
fn extract(
1348
archiver: &archive::Archiver,
@@ -18,24 +53,25 @@ fn extract(
1853
let file = std::fs::canonicalize(&file).unwrap_or(file);
1954
archiver.archive(&file);
2055
let input = std::fs::read(&file)?;
21-
let input = String::from_utf8(input)?;
56+
let (input, err) = from_utf8_lossy(&input);
2257
let line_index = LineIndex::new(&input);
2358
let display_path = file.to_string_lossy();
2459
let mut trap = traps.create("source", &file);
2560
let label = trap.emit_file(&file);
2661
let mut translator = translate::Translator::new(trap, label, line_index);
27-
62+
if let Some(err) = err {
63+
translator.emit_parse_error(display_path.as_ref(), err);
64+
}
2865
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
2966
for err in parse.errors() {
30-
let (start, _) = translator.location(err.range());
31-
log::warn!("{}:{}:{}: {}", display_path, start.line, start.col, err);
67+
translator.emit_parse_error(display_path.as_ref(), err);
3268
}
3369
if let Some(ast) = SourceFile::cast(parse.syntax_node()) {
3470
translator.emit_source_file(ast);
35-
translator.trap.commit()?
3671
} else {
3772
log::warn!("Skipped {}", display_path);
3873
}
74+
translator.trap.commit()?;
3975
Ok(())
4076
}
4177
fn main() -> anyhow::Result<()> {

rust/extractor/src/translate/base.rs

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
use crate::trap::TrapFile;
1+
use crate::trap::{DiagnosticSeverity, TrapFile};
22
use crate::trap::{Label, TrapClass};
33
use codeql_extractor::trap::{self};
44
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
55
use ra_ap_syntax::ast::RangeItem;
6-
use ra_ap_syntax::TextSize;
7-
use ra_ap_syntax::{ast, TextRange};
6+
use ra_ap_syntax::{ast, SyntaxError, TextRange};
87
pub trait TextValue {
98
fn try_get_text(&self) -> Option<String>;
109
}
@@ -71,16 +70,38 @@ impl Translator {
7170
}
7271
pub fn location(&self, range: TextRange) -> (LineCol, LineCol) {
7372
let start = self.line_index.line_col(range.start());
74-
let end = self.line_index.line_col(
75-
range
76-
.end()
77-
.checked_sub(TextSize::new(1))
78-
.unwrap_or(range.end()),
79-
);
73+
let range_end = range.end();
74+
// QL end positions are inclusive, while TextRange offsets are exclusive and point at the position
75+
// right after the last character of the range. We need to shift the end offset one character to the left to
76+
// get the right inclusive QL position. Unfortunately, simply subtracting `1` from the end-offset may cause
77+
// the offset to point in the middle of a mult-byte character, resulting in a `panic`. Therefore we use `try_line_col`
78+
// with decreasing offsets to find the start of the last character included in the range.
79+
for i in 1..4 {
80+
if let Some(end) = range_end
81+
.checked_sub(i.into())
82+
.and_then(|x| self.line_index.try_line_col(x))
83+
{
84+
return (start, end);
85+
}
86+
}
87+
let end = self.line_index.line_col(range_end);
8088
(start, end)
8189
}
8290
pub fn emit_location<T: TrapClass>(&mut self, label: Label<T>, node: impl ast::AstNode) {
8391
let (start, end) = self.location(node.syntax().text_range());
8492
self.trap.emit_location(self.label, label, start, end)
8593
}
94+
pub fn emit_parse_error(&mut self, path: &str, err: SyntaxError) {
95+
let (start, end) = self.location(err.range());
96+
log::warn!("{}:{}:{}: {}", path, start.line + 1, start.col + 1, err);
97+
let message = err.to_string();
98+
let location = self.trap.emit_location_label(self.label, start, end);
99+
self.trap.emit_diagnostic(
100+
DiagnosticSeverity::Warning,
101+
"parse_error".to_owned(),
102+
message.clone(),
103+
message,
104+
location,
105+
);
106+
}
86107
}

rust/extractor/src/trap.rs

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,19 +128,25 @@ pub struct TrapFile {
128128
compression: Compression,
129129
}
130130

131+
#[derive(Copy, Clone)]
132+
pub enum DiagnosticSeverity {
133+
Debug = 10,
134+
Info = 20,
135+
Warning = 30,
136+
Error = 40,
137+
}
131138
impl TrapFile {
132-
pub fn emit_location<E: TrapClass>(
139+
pub fn emit_location_label(
133140
&mut self,
134141
file_label: UntypedLabel,
135-
entity_label: Label<E>,
136142
start: LineCol,
137143
end: LineCol,
138-
) {
144+
) -> UntypedLabel {
139145
let start_line = 1 + start.line as usize;
140146
let start_column = 1 + start.col as usize;
141147
let end_line = 1 + end.line as usize;
142148
let end_column = 1 + end.col as usize;
143-
let location_label = extractor::location_label(
149+
extractor::location_label(
144150
&mut self.writer,
145151
trap::Location {
146152
file_label,
@@ -149,13 +155,43 @@ impl TrapFile {
149155
end_line,
150156
end_column,
151157
},
152-
);
158+
)
159+
}
160+
pub fn emit_location<E: TrapClass>(
161+
&mut self,
162+
file_label: UntypedLabel,
163+
entity_label: Label<E>,
164+
start: LineCol,
165+
end: LineCol,
166+
) {
167+
let location_label = self.emit_location_label(file_label, start, end);
153168
self.writer.add_tuple(
154169
"locatable_locations",
155170
vec![entity_label.into(), location_label.into()],
156171
);
157172
}
158173

174+
pub fn emit_diagnostic(
175+
&mut self,
176+
severity: DiagnosticSeverity,
177+
error_tag: String,
178+
error_message: String,
179+
full_error_message: String,
180+
location: UntypedLabel,
181+
) {
182+
let label = self.writer.fresh_id();
183+
self.writer.add_tuple(
184+
"diagnostics",
185+
vec![
186+
trap::Arg::Label(label),
187+
trap::Arg::Int(severity as usize),
188+
trap::Arg::String(error_tag),
189+
trap::Arg::String(error_message),
190+
trap::Arg::String(full_error_message),
191+
trap::Arg::Label(location),
192+
],
193+
);
194+
}
159195
pub fn emit_file(&mut self, absolute_path: &Path) -> trap::Label {
160196
extractor::populate_file(&mut self.writer, absolute_path)
161197
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/** Provides classes relating to extraction diagnostics. */
2+
3+
private import codeql.Locations
4+
5+
/** A diagnostic emitted during extraction, such as a parse error */
6+
class Diagnostic extends @diagnostic {
7+
int severity;
8+
string tag;
9+
string message;
10+
string fullMessage;
11+
Location location;
12+
13+
Diagnostic() { diagnostics(this, severity, tag, message, fullMessage, location) }
14+
15+
/**
16+
* Gets the numerical severity level associated with this diagnostic.
17+
*/
18+
int getSeverity() { result = severity }
19+
20+
/** Gets a string representation of the severity of this diagnostic. */
21+
string getSeverityText() {
22+
severity = 10 and result = "Debug"
23+
or
24+
severity = 20 and result = "Info"
25+
or
26+
severity = 30 and result = "Warning"
27+
or
28+
severity = 40 and result = "Error"
29+
}
30+
31+
/** Gets the error code associated with this diagnostic, e.g. parse_error. */
32+
string getTag() { result = tag }
33+
34+
/**
35+
* Gets the error message text associated with this diagnostic.
36+
*/
37+
string getMessage() { result = message }
38+
39+
/**
40+
* Gets the full error message text associated with this diagnostic.
41+
*/
42+
string getFullMessage() { result = fullMessage }
43+
44+
/** Gets the source location of this diagnostic. */
45+
Location getLocation() { result = location }
46+
47+
/** Gets a textual representation of this diagnostic. */
48+
string toString() { result = this.getMessage() }
49+
}
50+
51+
/** A diagnostic relating to a particular error in extracting a file. */
52+
class ExtractionError extends Diagnostic {
53+
ExtractionError() { this.getTag() = "parse_error" }
54+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/**
2+
* @name Extraction errors
3+
* @description List all extraction errors for files in the source code directory.
4+
* @kind diagnostic
5+
* @id rust/diagnostics/extraction-errors
6+
*/
7+
8+
import codeql.rust.Diagnostics
9+
import codeql.files.FileSystem
10+
11+
/** Gets the SARIF severity to associate an error. */
12+
int getSeverity() { result = 2 }
13+
14+
from ExtractionError error, File f
15+
where
16+
f = error.getLocation().getFile() and
17+
exists(f.getRelativePath())
18+
select error, "Extraction failed in " + f + " with error " + error.getMessage(), getSeverity()
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/**
2+
* @id rust/summary/number-of-files-extracted-with-errors
3+
* @name Total number of Rust files that were extracted with errors
4+
* @description The total number of Rust files in the source code directory that
5+
* were extracted, but where at least one extraction error occurred in the process.
6+
* @kind metric
7+
* @tags summary
8+
*/
9+
10+
import codeql.files.FileSystem
11+
import codeql.rust.Diagnostics
12+
13+
select count(File f |
14+
exists(ExtractionError e | e.getLocation().getFile() = f) and exists(f.getRelativePath())
15+
)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/**
2+
* @id rust/summary/number-of-successfully-extracted-files
3+
* @name Total number of Rust files that were extracted without error
4+
* @description The total number of Rust files in the source code directory that
5+
* were extracted without encountering any extraction errors.
6+
* @kind metric
7+
* @tags summary
8+
*/
9+
10+
import codeql.rust.Diagnostics
11+
import codeql.files.FileSystem
12+
13+
select count(File f |
14+
not exists(ExtractionError e | e.getLocation().getFile() = f) and exists(f.getRelativePath())
15+
)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
| lib.rs:1:1:3:22 | SourceFile |
2+
| lib.rs:2:1:2:8 | Module |
3+
| lib.rs:2:5:2:7 | Name |
4+
| lib.rs:3:1:3:8 | Module |
5+
| lib.rs:3:5:3:8 | Name |
6+
| lib.rs:3:10:3:20 | NameRef |
7+
| lib.rs:3:10:3:20 | Path |
8+
| lib.rs:3:10:3:20 | PathSegment |
9+
| lib.rs:3:10:3:21 | MacroCall |
10+
| utf8-identifiers.rs:1:1:4:6 | foo |
11+
| utf8-identifiers.rs:1:1:12:2 | SourceFile |
12+
| utf8-identifiers.rs:1:4:1:6 | Name |
13+
| utf8-identifiers.rs:1:7:4:1 | GenericParamList |
14+
| utf8-identifiers.rs:2:5:2:6 | Lifetime |
15+
| utf8-identifiers.rs:2:5:2:6 | LifetimeParam |
16+
| utf8-identifiers.rs:3:5:3:5 | Name |
17+
| utf8-identifiers.rs:3:5:3:5 | TypeParam |
18+
| utf8-identifiers.rs:4:2:4:3 | ParamList |
19+
| utf8-identifiers.rs:4:5:4:6 | BlockExpr |
20+
| utf8-identifiers.rs:4:5:4:6 | StmtList |
21+
| utf8-identifiers.rs:6:1:8:1 | Struct |
22+
| utf8-identifiers.rs:6:8:6:8 | Name |
23+
| utf8-identifiers.rs:6:10:8:1 | RecordFieldList |
24+
| utf8-identifiers.rs:7:5:7:5 | Name |
25+
| utf8-identifiers.rs:7:5:7:13 | RecordField |
26+
| utf8-identifiers.rs:7:9:7:13 | NameRef |
27+
| utf8-identifiers.rs:7:9:7:13 | Path |
28+
| utf8-identifiers.rs:7:9:7:13 | PathSegment |
29+
| utf8-identifiers.rs:7:9:7:13 | PathType |
30+
| utf8-identifiers.rs:10:1:10:3 | Visibility |
31+
| utf8-identifiers.rs:10:1:12:1 | main |
32+
| utf8-identifiers.rs:10:8:10:11 | Name |
33+
| utf8-identifiers.rs:10:12:10:13 | ParamList |
34+
| utf8-identifiers.rs:10:15:12:1 | BlockExpr |
35+
| utf8-identifiers.rs:10:15:12:1 | StmtList |
36+
| utf8-identifiers.rs:11:5:11:24 | LetStmt |
37+
| utf8-identifiers.rs:11:9:11:9 | IdentPat |
38+
| utf8-identifiers.rs:11:9:11:9 | Name |
39+
| utf8-identifiers.rs:11:14:11:23 | LiteralExpr |

0 commit comments

Comments
 (0)