Skip to content

Commit b20cc70

Browse files
committed
Implement text_direction_codepoint lint
Detects Unicode BiDi control codepoints in Cargo.toml files to mitigate Trojan Source attacks (CVE-2021-42574). Features: - Detects 9 Unicode BiDi control codepoints - Groups multiple codepoints on same line into single error (like rustc) - Moves workspace check into lint function for cleaner architecture - Uses rustc-style diagnostics with codepoint info in label annotation - Includes explanatory note about BiDi codepoint risks - Deny-by-default lint level
1 parent 95f7468 commit b20cc70

File tree

4 files changed

+264
-6
lines changed

4 files changed

+264
-6
lines changed

src/cargo/core/workspace.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use crate::lints::analyze_cargo_lints_table;
2525
use crate::lints::rules::blanket_hint_mostly_unused;
2626
use crate::lints::rules::check_im_a_teapot;
2727
use crate::lints::rules::implicit_minimum_version_req;
28+
use crate::lints::rules::text_direction_codepoint;
2829
use crate::ops;
2930
use crate::sources::{CRATES_IO_INDEX, CRATES_IO_REGISTRY, PathSource, SourceConfigMap};
3031
use crate::util::context::{FeatureUnification, Value};
@@ -1313,6 +1314,13 @@ impl<'gctx> Workspace<'gctx> {
13131314
&mut run_error_count,
13141315
self.gctx,
13151316
)?;
1317+
text_direction_codepoint(
1318+
pkg.into(),
1319+
&path,
1320+
&cargo_lints,
1321+
&mut run_error_count,
1322+
self.gctx,
1323+
)?;
13161324

13171325
if run_error_count > 0 {
13181326
let plural = if run_error_count == 1 { "" } else { "s" };
@@ -1370,6 +1378,13 @@ impl<'gctx> Workspace<'gctx> {
13701378
&mut run_error_count,
13711379
self.gctx,
13721380
)?;
1381+
text_direction_codepoint(
1382+
self.root_maybe().into(),
1383+
self.root_manifest(),
1384+
&cargo_lints,
1385+
&mut run_error_count,
1386+
self.gctx,
1387+
)?;
13731388
}
13741389

13751390
// This is a short term hack to allow `blanket_hint_mostly_unused`

src/cargo/lints/rules/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
mod blanket_hint_mostly_unused;
22
mod im_a_teapot;
33
mod implicit_minimum_version_req;
4+
mod text_direction_codepoint;
45
mod unknown_lints;
56

67
pub use blanket_hint_mostly_unused::blanket_hint_mostly_unused;
78
pub use im_a_teapot::check_im_a_teapot;
89
pub use implicit_minimum_version_req::implicit_minimum_version_req;
10+
pub use text_direction_codepoint::text_direction_codepoint;
911
pub use unknown_lints::output_unknown_lints;
1012

1113
pub const LINTS: &[crate::lints::Lint] = &[
1214
blanket_hint_mostly_unused::LINT,
1315
implicit_minimum_version_req::LINT,
1416
im_a_teapot::LINT,
17+
text_direction_codepoint::LINT,
1518
unknown_lints::LINT,
1619
];
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
use std::path::Path;
2+
3+
use annotate_snippets::AnnotationKind;
4+
use annotate_snippets::Group;
5+
use annotate_snippets::Level;
6+
use annotate_snippets::Snippet;
7+
use cargo_util_schemas::manifest::TomlToolLints;
8+
9+
use crate::CargoResult;
10+
use crate::GlobalContext;
11+
use crate::core::MaybePackage;
12+
use crate::lints::Lint;
13+
use crate::lints::LintLevel;
14+
use crate::lints::ManifestFor;
15+
use crate::lints::rel_cwd_manifest_path;
16+
17+
/// Unicode BiDi (bidirectional) control codepoints that can be used in
18+
/// "Trojan Source" attacks (CVE-2021-42574).
19+
///
20+
/// These codepoints change the visual representation of text on screen
21+
/// in a way that does not correspond to their in-memory representation.
22+
const UNICODE_BIDI_CODEPOINTS: &[(char, &str)] = &[
23+
('\u{202A}', "LEFT-TO-RIGHT EMBEDDING"),
24+
('\u{202B}', "RIGHT-TO-LEFT EMBEDDING"),
25+
('\u{202C}', "POP DIRECTIONAL FORMATTING"),
26+
('\u{202D}', "LEFT-TO-RIGHT OVERRIDE"),
27+
('\u{202E}', "RIGHT-TO-LEFT OVERRIDE"),
28+
('\u{2066}', "LEFT-TO-RIGHT ISOLATE"),
29+
('\u{2067}', "RIGHT-TO-LEFT ISOLATE"),
30+
('\u{2068}', "FIRST STRONG ISOLATE"),
31+
('\u{2069}', "POP DIRECTIONAL ISOLATE"),
32+
];
33+
34+
pub const LINT: Lint = Lint {
35+
name: "text_direction_codepoint",
36+
desc: "unicode codepoint changing visible direction of text present in manifest",
37+
groups: &[],
38+
default_level: LintLevel::Deny,
39+
edition_lint_opts: None,
40+
feature_gate: None,
41+
docs: None,
42+
};
43+
44+
pub fn text_direction_codepoint(
45+
manifest: ManifestFor<'_>,
46+
manifest_path: &Path,
47+
cargo_lints: &TomlToolLints,
48+
error_count: &mut usize,
49+
gctx: &GlobalContext,
50+
) -> CargoResult<()> {
51+
let (lint_level, reason) = manifest.lint_level(cargo_lints, LINT);
52+
53+
if lint_level == LintLevel::Allow {
54+
return Ok(());
55+
}
56+
57+
// Skip non-virtual workspace manifests - they are already checked as Package via emit_pkg_lints
58+
if matches!(&manifest, ManifestFor::Workspace(MaybePackage::Package(_))) {
59+
return Ok(());
60+
}
61+
62+
let contents = manifest.contents();
63+
let manifest_path_str = rel_cwd_manifest_path(manifest_path, gctx);
64+
65+
let findings: Vec<_> = contents
66+
.char_indices()
67+
.filter_map(|(idx, ch)| {
68+
UNICODE_BIDI_CODEPOINTS
69+
.iter()
70+
.find(|(c, _)| *c == ch)
71+
.map(|(_, name)| (idx, ch, *name))
72+
})
73+
.collect();
74+
75+
if findings.is_empty() {
76+
return Ok(());
77+
}
78+
79+
// Build line number map
80+
let mut line_map = Vec::new();
81+
line_map.push(0);
82+
for (idx, ch) in contents.char_indices() {
83+
if ch == '\n' {
84+
line_map.push(idx + 1);
85+
}
86+
}
87+
88+
let mut findings_by_line: std::collections::BTreeMap<usize, Vec<_>> =
89+
std::collections::BTreeMap::new();
90+
for (byte_idx, ch, name) in findings {
91+
let line_num = line_map
92+
.iter()
93+
.rposition(|&line_start| line_start <= byte_idx)
94+
.map(|i| i + 1)
95+
.unwrap_or(1);
96+
findings_by_line
97+
.entry(line_num)
98+
.or_insert_with(Vec::new)
99+
.push((byte_idx, ch, name));
100+
}
101+
102+
let level = lint_level.to_diagnostic_level();
103+
let emitted_source = LINT.emitted_source(lint_level, reason);
104+
105+
for (line_idx, line_findings) in findings_by_line.values().enumerate() {
106+
if lint_level.is_error() {
107+
*error_count += 1;
108+
}
109+
110+
let title = LINT.desc.to_string();
111+
112+
// Build snippet with multiple annotations
113+
let labels: Vec<String> = line_findings
114+
.iter()
115+
.map(|(_, ch, name)| format!(r#"`\u{{{:04X}}}` ({})"#, *ch as u32, name))
116+
.collect();
117+
118+
let mut snippet = Snippet::source(contents).path(&manifest_path_str);
119+
for ((byte_idx, ch, _), label) in line_findings.iter().zip(labels.iter()) {
120+
let span = *byte_idx..(*byte_idx + ch.len_utf8());
121+
snippet = snippet.annotation(AnnotationKind::Primary.span(span).label(label));
122+
}
123+
124+
let mut group = Group::with_title(level.clone().primary_title(&title)).element(snippet);
125+
126+
if line_idx == 0 {
127+
group = group.element(Level::NOTE.message(&emitted_source));
128+
group = group.element(Level::NOTE.message(
129+
"these kinds of unicode codepoints change the way text flows in applications/editors that support them, but can cause confusion because they change the order of characters on the screen",
130+
));
131+
}
132+
group = group.element(
133+
Level::HELP.message("if their presence wasn't intentional, you can remove them"),
134+
);
135+
136+
gctx.shell().print_report(&[group], lint_level.force())?;
137+
}
138+
139+
Ok(())
140+
}

tests/testsuite/lints/text_direction_codepoint.rs

Lines changed: 106 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,18 @@ description = \"A \u{202E}test package\"
1919

2020
p.cargo("check -Zcargo-lints")
2121
.masquerade_as_nightly_cargo(&["cargo-lints"])
22+
.with_status(101)
2223
.with_stderr_data(str![[r#"
23-
[CHECKING] foo v0.0.1 ([ROOT]/foo)
24-
[FINISHED] `dev` profile [unoptimized + debuginfo] target(s) in [ELAPSED]s
24+
[ERROR] unicode codepoint changing visible direction of text present in manifest
25+
--> Cargo.toml:6:18
26+
|
27+
6 | description = "A �test package"
28+
| ^ `/u{202E}` (RIGHT-TO-LEFT OVERRIDE)
29+
|
30+
= [NOTE] `cargo::text_direction_codepoint` is set to `deny` by default
31+
= [NOTE] these kinds of unicode codepoints change the way text flows in applications/editors that support them, but can cause confusion because they change the order of characters on the screen
32+
= [HELP] if their presence wasn't intentional, you can remove them
33+
[ERROR] encountered 1 error while running lints
2534
2635
"#]])
2736
.run();
@@ -36,6 +45,73 @@ name = \"foo\"
3645
version = \"0.0.1\"
3746
edition = \"2015\"
3847
description = \"A \u{202E}test\u{202D} package\"
48+
";
49+
let p = project()
50+
.file("Cargo.toml", manifest)
51+
.file("src/lib.rs", "")
52+
.build();
53+
54+
p.cargo("check -Zcargo-lints")
55+
.masquerade_as_nightly_cargo(&["cargo-lints"])
56+
.with_status(101)
57+
.with_stderr_data(str![[r#"
58+
[ERROR] unicode codepoint changing visible direction of text present in manifest
59+
--> Cargo.toml:6:18
60+
|
61+
6 | description = "A �test� package"
62+
| ^ ^ `/u{202D}` (LEFT-TO-RIGHT OVERRIDE)
63+
| |
64+
| `/u{202E}` (RIGHT-TO-LEFT OVERRIDE)
65+
|
66+
= [NOTE] `cargo::text_direction_codepoint` is set to `deny` by default
67+
= [NOTE] these kinds of unicode codepoints change the way text flows in applications/editors that support them, but can cause confusion because they change the order of characters on the screen
68+
= [HELP] if their presence wasn't intentional, you can remove them
69+
[ERROR] encountered 1 error while running lints
70+
71+
"#]])
72+
.run();
73+
}
74+
75+
#[cargo_test]
76+
fn allow_lint() {
77+
// Test that the lint can be allowed
78+
let manifest = "
79+
[package]
80+
name = \"foo\"
81+
version = \"0.0.1\"
82+
edition = \"2015\"
83+
description = \"A \u{202E}test package\"
84+
85+
[lints.cargo]
86+
text_direction_codepoint = \"allow\"
87+
";
88+
let p = project()
89+
.file("Cargo.toml", manifest)
90+
.file("src/lib.rs", "")
91+
.build();
92+
93+
p.cargo("check -Zcargo-lints")
94+
.masquerade_as_nightly_cargo(&["cargo-lints"])
95+
.with_stderr_data(str![[r#"
96+
[CHECKING] foo v0.0.1 ([ROOT]/foo)
97+
[FINISHED] `dev` profile [unoptimized + debuginfo] target(s) in [ELAPSED]s
98+
99+
"#]])
100+
.run();
101+
}
102+
103+
#[cargo_test]
104+
fn warn_lint() {
105+
// Test that the lint can be set to warn
106+
let manifest = "
107+
[package]
108+
name = \"foo\"
109+
version = \"0.0.1\"
110+
edition = \"2015\"
111+
description = \"A \u{202E}test package\"
112+
113+
[lints.cargo]
114+
text_direction_codepoint = \"warn\"
39115
";
40116
let p = project()
41117
.file("Cargo.toml", manifest)
@@ -45,6 +121,15 @@ description = \"A \u{202E}test\u{202D} package\"
45121
p.cargo("check -Zcargo-lints")
46122
.masquerade_as_nightly_cargo(&["cargo-lints"])
47123
.with_stderr_data(str![[r#"
124+
[WARNING] unicode codepoint changing visible direction of text present in manifest
125+
--> Cargo.toml:6:18
126+
|
127+
6 | description = "A �test package"
128+
| ^ `/u{202E}` (RIGHT-TO-LEFT OVERRIDE)
129+
|
130+
= [NOTE] `cargo::text_direction_codepoint` is set to `warn` in `[lints]`
131+
= [NOTE] these kinds of unicode codepoints change the way text flows in applications/editors that support them, but can cause confusion because they change the order of characters on the screen
132+
= [HELP] if their presence wasn't intentional, you can remove them
48133
[CHECKING] foo v0.0.1 ([ROOT]/foo)
49134
[FINISHED] `dev` profile [unoptimized + debuginfo] target(s) in [ELAPSED]s
50135
@@ -79,18 +164,24 @@ edition = "2015"
79164
}
80165

81166
#[cargo_test]
82-
fn workspace_member_bidi() {
83-
// Workspace with BiDi in member package
167+
fn workspace_inherited_allow() {
168+
// Workspace-level lint configuration with member package
84169
let manifest = "
85170
[workspace]
86171
members = [\"foo\"]
172+
173+
[workspace.lints.cargo]
174+
text_direction_codepoint = \"allow\"
87175
";
88176
let foo_manifest = "
89177
[package]
90178
name = \"foo\"
91179
version = \"0.0.1\"
92180
edition = \"2015\"
93181
description = \"A \u{202E}test package\"
182+
183+
[lints]
184+
workspace = true
94185
";
95186
let p = project()
96187
.file("Cargo.toml", manifest)
@@ -132,9 +223,18 @@ edition = "2015"
132223

133224
p.cargo("check -Zcargo-lints")
134225
.masquerade_as_nightly_cargo(&["cargo-lints"])
226+
.with_status(101)
135227
.with_stderr_data(str![[r#"
136-
[CHECKING] foo v0.0.1 ([ROOT]/foo/foo)
137-
[FINISHED] `dev` profile [unoptimized + debuginfo] target(s) in [ELAPSED]s
228+
[ERROR] unicode codepoint changing visible direction of text present in manifest
229+
--> Cargo.toml:6:14
230+
|
231+
6 | info = "test �info"
232+
| ^ `/u{202E}` (RIGHT-TO-LEFT OVERRIDE)
233+
|
234+
= [NOTE] `cargo::text_direction_codepoint` is set to `deny` by default
235+
= [NOTE] these kinds of unicode codepoints change the way text flows in applications/editors that support them, but can cause confusion because they change the order of characters on the screen
236+
= [HELP] if their presence wasn't intentional, you can remove them
237+
[ERROR] encountered 1 error while running lints
138238
139239
"#]])
140240
.run();

0 commit comments

Comments
 (0)