Skip to content

Commit d9d0c0a

Browse files
Auto merge of #149944 - fasterthanlime:rustdoc-arborium, r=<try>
[WIP] rustdoc: Add tree-sitter syntax highlighting for non-Rust code blocks
2 parents 3f4dc1e + 58e1fec commit d9d0c0a

File tree

14 files changed

+540
-11
lines changed

14 files changed

+540
-11
lines changed

Cargo.lock

Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,243 @@ dependencies = [
170170
"object 0.37.3",
171171
]
172172

173+
[[package]]
174+
name = "arborium"
175+
version = "2.0.0"
176+
source = "registry+https://github.com/rust-lang/crates.io-index"
177+
checksum = "f00a72aec131b90c1c6495c565c4e4842fdbaf137ea4cacec110d353937ab99d"
178+
dependencies = [
179+
"arborium-bash",
180+
"arborium-c",
181+
"arborium-cpp",
182+
"arborium-css",
183+
"arborium-go",
184+
"arborium-highlight",
185+
"arborium-html",
186+
"arborium-java",
187+
"arborium-javascript",
188+
"arborium-json",
189+
"arborium-python",
190+
"arborium-ruby",
191+
"arborium-sql",
192+
"arborium-theme",
193+
"arborium-toml",
194+
"arborium-tree-sitter",
195+
"arborium-typescript",
196+
"arborium-yaml",
197+
"dlmalloc",
198+
]
199+
200+
[[package]]
201+
name = "arborium-bash"
202+
version = "2.1.0"
203+
source = "registry+https://github.com/rust-lang/crates.io-index"
204+
checksum = "34c6fd5230512b1191480100dd7876e972d1d6fd8e04fc62950a46b2a185405a"
205+
dependencies = [
206+
"arborium-sysroot",
207+
"cc",
208+
"tree-sitter-language",
209+
]
210+
211+
[[package]]
212+
name = "arborium-c"
213+
version = "2.1.0"
214+
source = "registry+https://github.com/rust-lang/crates.io-index"
215+
checksum = "6eb49d9d4d314d39e3ad41f891f3c46a417e21127c572621d3bb2b8acb0f67d2"
216+
dependencies = [
217+
"arborium-sysroot",
218+
"cc",
219+
"tree-sitter-language",
220+
]
221+
222+
[[package]]
223+
name = "arborium-cpp"
224+
version = "2.1.0"
225+
source = "registry+https://github.com/rust-lang/crates.io-index"
226+
checksum = "b3813b0fd9e61425fe387333eb77ff5a3ea890f66d89e1262a66372ecad1274f"
227+
dependencies = [
228+
"arborium-c",
229+
"arborium-sysroot",
230+
"cc",
231+
"tree-sitter-language",
232+
]
233+
234+
[[package]]
235+
name = "arborium-css"
236+
version = "2.1.0"
237+
source = "registry+https://github.com/rust-lang/crates.io-index"
238+
checksum = "5d95540ee6ea2c33f40b45d9c40283a5c396e0ceb8529c4f2151932e43858a3b"
239+
dependencies = [
240+
"arborium-sysroot",
241+
"cc",
242+
"tree-sitter-language",
243+
]
244+
245+
[[package]]
246+
name = "arborium-go"
247+
version = "2.1.0"
248+
source = "registry+https://github.com/rust-lang/crates.io-index"
249+
checksum = "1d7f8df9adca3da8c9e36889e0f52ab359dd36d168bc677e65fce5f43ca66b0d"
250+
dependencies = [
251+
"arborium-sysroot",
252+
"cc",
253+
"tree-sitter-language",
254+
]
255+
256+
[[package]]
257+
name = "arborium-highlight"
258+
version = "2.1.0"
259+
source = "registry+https://github.com/rust-lang/crates.io-index"
260+
checksum = "2f2df668f0c80bfa2e437f74d45a4922a0e9256c2476560200774be4b60686f3"
261+
dependencies = [
262+
"arborium-theme",
263+
"arborium-tree-sitter",
264+
"streaming-iterator",
265+
]
266+
267+
[[package]]
268+
name = "arborium-html"
269+
version = "2.0.0"
270+
source = "registry+https://github.com/rust-lang/crates.io-index"
271+
checksum = "a8d898910e534cddb0dca62ff3789b94637979b16d5354f153235438f58d29d6"
272+
dependencies = [
273+
"arborium-css",
274+
"arborium-javascript",
275+
"arborium-sysroot",
276+
"cc",
277+
"tree-sitter-language",
278+
]
279+
280+
[[package]]
281+
name = "arborium-java"
282+
version = "2.1.0"
283+
source = "registry+https://github.com/rust-lang/crates.io-index"
284+
checksum = "b91053783a4c3806cdcf92a7009bfe765ebcf0a13cd49ee361751a68f1f2c10f"
285+
dependencies = [
286+
"arborium-sysroot",
287+
"cc",
288+
"tree-sitter-language",
289+
]
290+
291+
[[package]]
292+
name = "arborium-javascript"
293+
version = "2.1.0"
294+
source = "registry+https://github.com/rust-lang/crates.io-index"
295+
checksum = "7220c71b1056508a028a4acfa4a10d8ca1713420ac3a36853dfd4a55bb335a4b"
296+
dependencies = [
297+
"arborium-sysroot",
298+
"cc",
299+
"tree-sitter-language",
300+
]
301+
302+
[[package]]
303+
name = "arborium-json"
304+
version = "2.1.0"
305+
source = "registry+https://github.com/rust-lang/crates.io-index"
306+
checksum = "775321ffff8b71819c6dfead9e62bcd1efc1616306e59f2e1317ba7bef282e1f"
307+
dependencies = [
308+
"arborium-sysroot",
309+
"cc",
310+
"tree-sitter-language",
311+
]
312+
313+
[[package]]
314+
name = "arborium-python"
315+
version = "2.1.0"
316+
source = "registry+https://github.com/rust-lang/crates.io-index"
317+
checksum = "45c5db8f67870cc64b67933a644fb296b3c367e9bfd85aa222a1ff6d49883e25"
318+
dependencies = [
319+
"arborium-sysroot",
320+
"cc",
321+
"tree-sitter-language",
322+
]
323+
324+
[[package]]
325+
name = "arborium-ruby"
326+
version = "2.1.0"
327+
source = "registry+https://github.com/rust-lang/crates.io-index"
328+
checksum = "7391c3561e43b892241ef216aafa974e1c3a59cd4e29d9cb9a7febc41645c0a8"
329+
dependencies = [
330+
"arborium-sysroot",
331+
"cc",
332+
"tree-sitter-language",
333+
]
334+
335+
[[package]]
336+
name = "arborium-sql"
337+
version = "2.1.0"
338+
source = "registry+https://github.com/rust-lang/crates.io-index"
339+
checksum = "dc2c5bd1ed383d8ed8f0269d0cfddb475af9c98580d43f76cc4bb85a6f052633"
340+
dependencies = [
341+
"arborium-sysroot",
342+
"cc",
343+
"tree-sitter-language",
344+
]
345+
346+
[[package]]
347+
name = "arborium-sysroot"
348+
version = "2.1.0"
349+
source = "registry+https://github.com/rust-lang/crates.io-index"
350+
checksum = "6d25c6fe8f35b7803048ca9f0846432011510d5196eb1089cf3a4bb37c35d094"
351+
dependencies = [
352+
"cc",
353+
"dlmalloc",
354+
]
355+
356+
[[package]]
357+
name = "arborium-theme"
358+
version = "2.1.0"
359+
source = "registry+https://github.com/rust-lang/crates.io-index"
360+
checksum = "9da38b2109b8af45b7e0bce0c96f7db1c17831a62a23ae586c5705efac635758"
361+
362+
[[package]]
363+
name = "arborium-toml"
364+
version = "2.1.0"
365+
source = "registry+https://github.com/rust-lang/crates.io-index"
366+
checksum = "b391b5bf276950b457d0b05efc0089c74b94e2f0939f8a63d98b4b84da5ebf12"
367+
dependencies = [
368+
"arborium-sysroot",
369+
"cc",
370+
"tree-sitter-language",
371+
]
372+
373+
[[package]]
374+
name = "arborium-tree-sitter"
375+
version = "2.1.0"
376+
source = "registry+https://github.com/rust-lang/crates.io-index"
377+
checksum = "936e30ab0ba24131c37823d6d087b4ab6b7d59c6dd26fd1f1470e50582dc07ba"
378+
dependencies = [
379+
"arborium-sysroot",
380+
"cc",
381+
"regex",
382+
"regex-syntax",
383+
"streaming-iterator",
384+
"tree-sitter-language",
385+
]
386+
387+
[[package]]
388+
name = "arborium-typescript"
389+
version = "2.1.0"
390+
source = "registry+https://github.com/rust-lang/crates.io-index"
391+
checksum = "9cd96fae6737d469b2042e2f7c37e37e6276465ba1c15162182e4106c189fc41"
392+
dependencies = [
393+
"arborium-javascript",
394+
"arborium-sysroot",
395+
"cc",
396+
"tree-sitter-language",
397+
]
398+
399+
[[package]]
400+
name = "arborium-yaml"
401+
version = "2.1.0"
402+
source = "registry+https://github.com/rust-lang/crates.io-index"
403+
checksum = "ef47437d2fe489ba8aa718dd1be3152b80b988b976183e84cfb1865a17cae1e8"
404+
dependencies = [
405+
"arborium-sysroot",
406+
"cc",
407+
"tree-sitter-language",
408+
]
409+
173410
[[package]]
174411
name = "arrayref"
175412
version = "0.3.9"
@@ -1265,6 +1502,17 @@ version = "1.0.10"
12651502
source = "registry+https://github.com/rust-lang/crates.io-index"
12661503
checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921"
12671504

1505+
[[package]]
1506+
name = "dlmalloc"
1507+
version = "0.2.12"
1508+
source = "registry+https://github.com/rust-lang/crates.io-index"
1509+
checksum = "6738d2e996274e499bc7b0d693c858b7720b9cd2543a0643a3087e6cb0a4fa16"
1510+
dependencies = [
1511+
"cfg-if",
1512+
"libc",
1513+
"windows-sys 0.61.2",
1514+
]
1515+
12681516
[[package]]
12691517
name = "dyn-clone"
12701518
version = "1.0.20"
@@ -4861,6 +5109,7 @@ dependencies = [
48615109
name = "rustdoc"
48625110
version = "0.0.0"
48635111
dependencies = [
5112+
"arborium",
48645113
"arrayvec",
48655114
"askama",
48665115
"base64",
@@ -5329,6 +5578,12 @@ version = "1.1.0"
53295578
source = "registry+https://github.com/rust-lang/crates.io-index"
53305579
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
53315580

5581+
[[package]]
5582+
name = "streaming-iterator"
5583+
version = "0.1.9"
5584+
source = "registry+https://github.com/rust-lang/crates.io-index"
5585+
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
5586+
53325587
[[package]]
53335588
name = "string_cache"
53345589
version = "0.8.9"
@@ -5851,6 +6106,12 @@ dependencies = [
58516106
"tracing-subscriber",
58526107
]
58536108

6109+
[[package]]
6110+
name = "tree-sitter-language"
6111+
version = "0.1.6"
6112+
source = "registry+https://github.com/rust-lang/crates.io-index"
6113+
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
6114+
58546115
[[package]]
58556116
name = "twox-hash"
58566117
version = "1.6.3"

src/librustdoc/Cargo.toml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,23 @@ path = "lib.rs"
99

1010
[dependencies]
1111
# tidy-alphabetical-start
12+
arborium = { version = "2.0.0", default-features = false, features = [
13+
"lang-bash",
14+
"lang-c",
15+
"lang-cpp",
16+
"lang-css",
17+
"lang-go",
18+
"lang-html",
19+
"lang-java",
20+
"lang-javascript",
21+
"lang-json",
22+
"lang-python",
23+
"lang-ruby",
24+
"lang-sql",
25+
"lang-toml",
26+
"lang-typescript",
27+
"lang-yaml",
28+
] }
1229
arrayvec = { version = "0.7", default-features = false }
1330
askama = { version = "0.14", default-features = false, features = ["alloc", "config", "derive"] }
1431
base64 = "0.21.7"

src/librustdoc/config.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ pub(crate) struct RenderOptions {
312312
pub(crate) disable_minification: bool,
313313
/// If `true`, HTML source pages will generate the possibility to expand macros.
314314
pub(crate) generate_macro_expansion: bool,
315+
/// If `true`, non-Rust code blocks will be syntax-highlighted using tree-sitter.
316+
pub(crate) highlight_foreign_code: bool,
315317
}
316318

317319
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -805,6 +807,7 @@ impl Options {
805807
let no_capture = matches.opt_present("no-capture");
806808
let generate_link_to_definition = matches.opt_present("generate-link-to-definition");
807809
let generate_macro_expansion = matches.opt_present("generate-macro-expansion");
810+
let highlight_foreign_code = matches.opt_present("highlight-foreign-code");
808811
let extern_html_root_takes_precedence =
809812
matches.opt_present("extern-html-root-takes-precedence");
810813
let html_no_source = matches.opt_present("html-no-source");
@@ -916,6 +919,7 @@ impl Options {
916919
include_parts_dir,
917920
parts_out_dir,
918921
disable_minification,
922+
highlight_foreign_code,
919923
};
920924
Some((input, options, render_options, loaded_paths))
921925
}

src/librustdoc/externalfiles.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ impl ExternalHtml {
4747
edition,
4848
playground,
4949
heading_offset: HeadingOffset::H2,
50+
highlight_foreign_code: false,
5051
}
5152
.write_into(&mut bc)
5253
.unwrap();
@@ -63,6 +64,7 @@ impl ExternalHtml {
6364
edition,
6465
playground,
6566
heading_offset: HeadingOffset::H2,
67+
highlight_foreign_code: false,
6668
}
6769
.write_into(&mut ac)
6870
.unwrap();

src/librustdoc/html/highlight.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,5 +1475,31 @@ fn string_without_closing_tag<T: Display>(
14751475
}
14761476
}
14771477

1478+
/// Highlights non-Rust code using arborium (tree-sitter based).
1479+
/// Returns `None` if the language is not supported, in which case
1480+
/// the caller should fall back to plain escaped text.
1481+
pub(crate) fn highlight_foreign_code(lang: &str, code: &str) -> Option<String> {
1482+
use std::cell::RefCell;
1483+
1484+
thread_local! {
1485+
static HIGHLIGHTER: RefCell<arborium::Highlighter> =
1486+
RefCell::new(arborium::Highlighter::new());
1487+
}
1488+
1489+
// Map common language aliases to arborium grammar names
1490+
let lang = match lang {
1491+
"js" => "javascript",
1492+
"ts" => "typescript",
1493+
"py" => "python",
1494+
"rb" => "ruby",
1495+
"sh" | "shell" | "zsh" => "bash",
1496+
"yml" => "yaml",
1497+
"c++" | "cxx" => "cpp",
1498+
other => other,
1499+
};
1500+
1501+
HIGHLIGHTER.with_borrow_mut(|h| h.highlight(lang, code).ok())
1502+
}
1503+
14781504
#[cfg(test)]
14791505
mod tests;

0 commit comments

Comments
 (0)