Skip to content

Commit b8b0451

Browse files
feat(tui): switch to tree-sitter-highlight bash highlighting (#4666)
use tree-sitter-highlight instead of custom logic over the tree-sitter tree to highlight bash.
1 parent 0e5d72c commit b8b0451

File tree

4 files changed

+215
-107
lines changed

4 files changed

+215
-107
lines changed

codex-rs/Cargo.lock

Lines changed: 16 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codex-rs/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,9 @@ tracing = "0.1.41"
175175
tracing-appender = "0.2.3"
176176
tracing-subscriber = "0.3.20"
177177
tracing-test = "0.2.5"
178-
tree-sitter = "0.25.9"
179-
tree-sitter-bash = "0.25.0"
178+
tree-sitter = "0.25.10"
179+
tree-sitter-bash = "0.25"
180+
tree-sitter-highlight = "0.25.10"
180181
ts-rs = "11"
181182
unicode-segmentation = "1.12.0"
182183
unicode-width = "0.2"

codex-rs/tui/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ strum_macros = { workspace = true }
6868
supports-color = { workspace = true }
6969
tempfile = { workspace = true }
7070
textwrap = { workspace = true }
71+
tree-sitter-highlight = { workspace = true }
72+
tree-sitter-bash = { workspace = true }
7173
tokio = { workspace = true, features = [
7274
"io-std",
7375
"macros",
Lines changed: 194 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,146 @@
1-
use codex_core::bash::try_parse_bash;
1+
use ratatui::style::Style;
22
use ratatui::style::Stylize;
33
use ratatui::text::Line;
44
use ratatui::text::Span;
5+
use std::sync::OnceLock;
6+
use tree_sitter_highlight::Highlight;
7+
use tree_sitter_highlight::HighlightConfiguration;
8+
use tree_sitter_highlight::HighlightEvent;
9+
use tree_sitter_highlight::Highlighter;
510

6-
/// Convert the full bash script into per-line styled content by first
7-
/// computing operator-dimmed spans across the entire script, then splitting
8-
/// by newlines and dimming heredoc body lines. Performs a single parse and
9-
/// reuses it for both highlighting and heredoc detection.
10-
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
11-
// Parse once; use the tree for both highlighting and heredoc body detection.
12-
let spans: Vec<Span<'static>> = if let Some(tree) = try_parse_bash(script) {
13-
// Single walk: collect operator ranges and heredoc rows.
14-
let root = tree.root_node();
15-
let mut cursor = root.walk();
16-
let mut stack = vec![root];
17-
let mut ranges: Vec<(usize, usize)> = Vec::new();
18-
while let Some(node) = stack.pop() {
19-
if !node.is_named() && !node.is_extra() {
20-
let kind = node.kind();
21-
let is_quote = matches!(kind, "\"" | "'" | "`");
22-
let is_whitespace = kind.trim().is_empty();
23-
if !is_quote && !is_whitespace {
24-
ranges.push((node.start_byte(), node.end_byte()));
25-
}
26-
} else if node.kind() == "heredoc_body" {
27-
ranges.push((node.start_byte(), node.end_byte()));
28-
}
29-
for child in node.children(&mut cursor) {
30-
stack.push(child);
31-
}
11+
// Ref: https://github.com/tree-sitter/tree-sitter-bash/blob/master/queries/highlights.scm
12+
#[derive(Copy, Clone)]
13+
enum BashHighlight {
14+
Comment,
15+
Constant,
16+
Embedded,
17+
Function,
18+
Keyword,
19+
Number,
20+
Operator,
21+
Property,
22+
String,
23+
}
24+
25+
impl BashHighlight {
26+
const ALL: [Self; 9] = [
27+
Self::Comment,
28+
Self::Constant,
29+
Self::Embedded,
30+
Self::Function,
31+
Self::Keyword,
32+
Self::Number,
33+
Self::Operator,
34+
Self::Property,
35+
Self::String,
36+
];
37+
38+
const fn as_str(self) -> &'static str {
39+
match self {
40+
Self::Comment => "comment",
41+
Self::Constant => "constant",
42+
Self::Embedded => "embedded",
43+
Self::Function => "function",
44+
Self::Keyword => "keyword",
45+
Self::Number => "number",
46+
Self::Operator => "operator",
47+
Self::Property => "property",
48+
Self::String => "string",
3249
}
33-
if ranges.is_empty() {
34-
ranges.push((script.len(), script.len()));
50+
}
51+
52+
fn style(self) -> Style {
53+
match self {
54+
Self::Comment | Self::Operator | Self::String => Style::default().dim(),
55+
_ => Style::default(),
3556
}
36-
ranges.sort_by_key(|(st, _)| *st);
37-
let mut spans: Vec<Span<'static>> = Vec::new();
38-
let mut i = 0usize;
39-
for (start, end) in ranges.into_iter() {
40-
let dim_start = start.max(i);
41-
let dim_end = end;
42-
if dim_start < dim_end {
43-
if dim_start > i {
44-
spans.push(script[i..dim_start].to_string().into());
45-
}
46-
spans.push(script[dim_start..dim_end].to_string().dim());
47-
i = dim_end;
48-
}
57+
}
58+
}
59+
60+
static HIGHLIGHT_CONFIG: OnceLock<HighlightConfiguration> = OnceLock::new();
61+
62+
fn highlight_names() -> &'static [&'static str] {
63+
static NAMES: OnceLock<[&'static str; BashHighlight::ALL.len()]> = OnceLock::new();
64+
NAMES
65+
.get_or_init(|| BashHighlight::ALL.map(BashHighlight::as_str))
66+
.as_slice()
67+
}
68+
69+
fn highlight_config() -> &'static HighlightConfiguration {
70+
HIGHLIGHT_CONFIG.get_or_init(|| {
71+
let language = tree_sitter_bash::LANGUAGE.into();
72+
#[expect(clippy::expect_used)]
73+
let mut config = HighlightConfiguration::new(
74+
language,
75+
"bash",
76+
tree_sitter_bash::HIGHLIGHT_QUERY,
77+
"",
78+
"",
79+
)
80+
.expect("load bash highlight query");
81+
config.configure(highlight_names());
82+
config
83+
})
84+
}
85+
86+
fn highlight_for(highlight: Highlight) -> BashHighlight {
87+
BashHighlight::ALL[highlight.0]
88+
}
89+
90+
fn push_segment(lines: &mut Vec<Line<'static>>, segment: &str, style: Option<Style>) {
91+
for (i, part) in segment.split('\n').enumerate() {
92+
if i > 0 {
93+
lines.push(Line::from(""));
4994
}
50-
if i < script.len() {
51-
spans.push(script[i..].to_string().into());
95+
if part.is_empty() {
96+
continue;
5297
}
53-
spans
54-
} else {
55-
vec![script.to_string().into()]
56-
};
57-
// Split spans into lines preserving style boundaries and highlights across newlines.
98+
let span = match style {
99+
Some(style) => Span::styled(part.to_string(), style),
100+
None => part.to_string().into(),
101+
};
102+
if let Some(last) = lines.last_mut() {
103+
last.spans.push(span);
104+
}
105+
}
106+
}
107+
108+
/// Convert a bash script into per-line styled content using tree-sitter's
109+
/// bash highlight query. The highlighter is streamed so multi-line content is
110+
/// split into `Line`s while preserving style boundaries.
111+
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
112+
let mut highlighter = Highlighter::new();
113+
let iterator =
114+
match highlighter.highlight(highlight_config(), script.as_bytes(), None, |_| None) {
115+
Ok(iter) => iter,
116+
Err(_) => return vec![script.to_string().into()],
117+
};
118+
58119
let mut lines: Vec<Line<'static>> = vec![Line::from("")];
59-
for sp in spans {
60-
let style = sp.style;
61-
let text = sp.content.into_owned();
62-
for (i, part) in text.split('\n').enumerate() {
63-
if i > 0 {
64-
lines.push(Line::from(""));
65-
}
66-
if part.is_empty() {
67-
continue;
120+
let mut highlight_stack: Vec<Highlight> = Vec::new();
121+
122+
for event in iterator {
123+
match event {
124+
Ok(HighlightEvent::HighlightStart(highlight)) => highlight_stack.push(highlight),
125+
Ok(HighlightEvent::HighlightEnd) => {
126+
highlight_stack.pop();
68127
}
69-
let span = Span {
70-
style,
71-
content: std::borrow::Cow::Owned(part.to_string()),
72-
};
73-
if let Some(last) = lines.last_mut() {
74-
last.spans.push(span);
128+
Ok(HighlightEvent::Source { start, end }) => {
129+
if start == end {
130+
continue;
131+
}
132+
let style = highlight_stack.last().map(|h| highlight_for(*h).style());
133+
push_segment(&mut lines, &script[start..end], style);
75134
}
135+
Err(_) => return vec![script.to_string().into()],
76136
}
77137
}
78-
lines
138+
139+
if lines.is_empty() {
140+
vec![Line::from("")]
141+
} else {
142+
lines
143+
}
79144
}
80145

81146
#[cfg(test)]
@@ -84,11 +149,8 @@ mod tests {
84149
use pretty_assertions::assert_eq;
85150
use ratatui::style::Modifier;
86151

87-
#[test]
88-
fn dims_expected_bash_operators() {
89-
let s = "echo foo && bar || baz | qux & (echo hi)";
90-
let lines = highlight_bash_to_lines(s);
91-
let reconstructed: String = lines
152+
fn reconstructed(lines: &[Line<'static>]) -> String {
153+
lines
92154
.iter()
93155
.map(|l| {
94156
l.spans
@@ -97,49 +159,78 @@ mod tests {
97159
.collect::<String>()
98160
})
99161
.collect::<Vec<_>>()
100-
.join("\n");
101-
assert_eq!(reconstructed, s);
162+
.join("\n")
163+
}
102164

103-
fn is_dim(span: &Span<'_>) -> bool {
104-
span.style.add_modifier.contains(Modifier::DIM)
105-
}
106-
let dimmed: Vec<String> = lines
165+
fn dimmed_tokens(lines: &[Line<'static>]) -> Vec<String> {
166+
lines
107167
.iter()
108168
.flat_map(|l| l.spans.iter())
109-
.filter(|sp| is_dim(sp))
169+
.filter(|sp| sp.style.add_modifier.contains(Modifier::DIM))
110170
.map(|sp| sp.content.clone().into_owned())
111-
.collect();
112-
assert_eq!(dimmed, vec!["&&", "||", "|", "&", "(", ")"]);
171+
.map(|token| token.trim().to_string())
172+
.filter(|token| !token.is_empty())
173+
.collect()
174+
}
175+
176+
#[test]
177+
fn dims_expected_bash_operators() {
178+
let s = "echo foo && bar || baz | qux & (echo hi)";
179+
let lines = highlight_bash_to_lines(s);
180+
assert_eq!(reconstructed(&lines), s);
181+
182+
let dimmed = dimmed_tokens(&lines);
183+
assert!(dimmed.contains(&"&&".to_string()));
184+
assert!(dimmed.contains(&"|".to_string()));
185+
assert!(!dimmed.contains(&"echo".to_string()));
113186
}
114187

115188
#[test]
116-
fn does_not_dim_quotes_but_dims_other_punct() {
189+
fn dims_redirects_and_strings() {
117190
let s = "echo \"hi\" > out.txt; echo 'ok'";
118191
let lines = highlight_bash_to_lines(s);
119-
let reconstructed: String = lines
120-
.iter()
121-
.map(|l| {
122-
l.spans
123-
.iter()
124-
.map(|sp| sp.content.clone())
125-
.collect::<String>()
126-
})
127-
.collect::<Vec<_>>()
128-
.join("\n");
129-
assert_eq!(reconstructed, s);
192+
assert_eq!(reconstructed(&lines), s);
130193

131-
fn is_dim(span: &Span<'_>) -> bool {
132-
span.style.add_modifier.contains(Modifier::DIM)
133-
}
134-
let dimmed: Vec<String> = lines
135-
.iter()
136-
.flat_map(|l| l.spans.iter())
137-
.filter(|sp| is_dim(sp))
138-
.map(|sp| sp.content.clone().into_owned())
139-
.collect();
194+
let dimmed = dimmed_tokens(&lines);
140195
assert!(dimmed.contains(&">".to_string()));
141-
assert!(dimmed.contains(&";".to_string()));
142-
assert!(!dimmed.contains(&"\"".to_string()));
143-
assert!(!dimmed.contains(&"'".to_string()));
196+
assert!(dimmed.contains(&"\"hi\"".to_string()));
197+
assert!(dimmed.contains(&"'ok'".to_string()));
198+
}
199+
200+
#[test]
201+
fn highlights_command_and_strings() {
202+
let s = "echo \"hi\"";
203+
let lines = highlight_bash_to_lines(s);
204+
let mut echo_style = None;
205+
let mut string_style = None;
206+
for span in &lines[0].spans {
207+
let text = span.content.as_ref();
208+
if text == "echo" {
209+
echo_style = Some(span.style);
210+
}
211+
if text == "\"hi\"" {
212+
string_style = Some(span.style);
213+
}
214+
}
215+
let echo_style = echo_style.expect("echo span missing");
216+
let string_style = string_style.expect("string span missing");
217+
assert!(echo_style.fg.is_none());
218+
assert!(!echo_style.add_modifier.contains(Modifier::DIM));
219+
assert!(string_style.add_modifier.contains(Modifier::DIM));
220+
}
221+
222+
#[test]
223+
fn highlights_heredoc_body_as_string() {
224+
let s = "cat <<EOF\nheredoc body\nEOF";
225+
let lines = highlight_bash_to_lines(s);
226+
let body_line = &lines[1];
227+
let mut body_style = None;
228+
for span in &body_line.spans {
229+
if span.content.as_ref() == "heredoc body" {
230+
body_style = Some(span.style);
231+
}
232+
}
233+
let body_style = body_style.expect("missing heredoc span");
234+
assert!(body_style.add_modifier.contains(Modifier::DIM));
144235
}
145236
}

0 commit comments

Comments
 (0)