Skip to content

Commit 413d3bf

Browse files
committed
sped up ext handling in language crate using constants and aho corasick; probably premature optimization, but it is faster
1 parent 709b50b commit 413d3bf

File tree

22 files changed

+2665
-1179
lines changed

22 files changed

+2665
-1179
lines changed

.cargo/config.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[unstable]
2+
codegen-backend = true
3+
4+
[profile.dev]
5+
codegen-backend = "cranelift"

Cargo.lock

Lines changed: 32 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# SPDX-FileContributor: Adam Poulemanos <[email protected]>
33
#
44
# SPDX-License-Identifier: MIT OR Apache-2.0
5-
cargo-features = ["codegen-backend"]
65

76
# =========================================================
87
#* THREAD - Workspace
@@ -64,8 +63,8 @@ macro_rules_attribute = { version = "0.2.2" }
6463
async-trait = { version = "0.1.88" }
6564

6665
bit-set = { version = "0.8.0" }
67-
ignore = { version = "0.4.22" }
68-
regex = { version = "1.10" }
66+
ignore = { version = "0.4.23" }
67+
regex = { version = "1.11.1" }
6968
thiserror = { version = "2.0.12" }
7069

7170
# The center of it all

crates/ast-engine/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ matching = ["dep:regex"]
4444

4545
[dev-dependencies]
4646
tree-sitter-typescript = "0.23.2"
47-
thread-language = { workspace = true, features = ["builtin-parser"] }
47+
thread-language = { workspace = true, features = ["all-parsers"] }
4848
criterion = { version = "0.6.0", features = ["html_reports"] }
4949

5050
[[bench]]

crates/ast-engine/src/pinned.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,11 @@ impl<T, D: Doc + 'static> PinnedNodeData<D, T> {
121121
where
122122
F: FnOnce(&'static Root<D>) -> T,
123123
{
124-
// TODO: explain why unsafe works here and what guarantee it needs
124+
// SAFETY: We're creating a 'static reference to `pin`, which is safe because:
125+
// 1. This struct owns `pin` and keeps it alive for the container's entire lifetime
126+
// 2. Tree-sitter nodes are heap-allocated with stable pointers
127+
// 3. We re-adopt nodes on each access to ensure they point to valid memory
128+
// The 'static lifetime is a "lie" to the type system - nodes live as long as this container
125129
let reference = unsafe { &*(&raw const pin) as &'static Root<D> };
126130
let data = func(reference);
127131
Self { pin, data }
@@ -144,7 +148,9 @@ where
144148
}
145149

146150
/// # Safety
147-
/// TODO: explain unsafe trait
151+
/// This trait is unsafe because implementors must ensure that `visit_nodes` calls
152+
/// the provided function on all nodes contained within the data structure.
153+
/// Failure to do so will result in stale node pointers that may reference freed memory.
148154
pub unsafe trait NodeData<D: Doc> {
149155
type Data;
150156
fn get_data(&self) -> &Self::Data;

crates/ast-engine/src/replacer/template.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,6 @@ if (true) {
363363

364364
#[test]
365365
fn test_nested_matching_replace() {
366-
// TODO impossible, we don't support nested replacement
366+
// TODO we don't support nested replacement yet
367367
}
368368
}

crates/language/Cargo.toml

Lines changed: 68 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ description = "Language definitions and parsers for Thread"
88
keywords = ["ast", "pattern", "codemod", "search", "rewrite", "languages"]
99
license = "AGPL-3.0-or-later AND MIT"
1010
readme = "README.md"
11-
categories = ["ast", "pattern", "codemod", "search", "rewrite",]
11+
categories = ["ast", "pattern", "codemod", "search", "rewrite"]
1212
version = "0.1.0"
1313
authors = [
1414
"Herrington Darkholme <[email protected]>",
@@ -24,14 +24,17 @@ include.workspace = true
2424
cc = "1.2.30"
2525

2626
[dependencies]
27-
thread-ast-engine = { workspace = true, features = ["parsing"] }
27+
thread-ast-engine = { workspace = true }
2828
thread-utils = { workspace = true, default-features = false, features = [
2929
"hashers",
3030
] }
31-
aho-corasick.workspace = true
32-
ignore.workspace = true
33-
serde.workspace = true
34-
tree-sitter.workspace = true
31+
32+
cfg-if = { workspace = true }
33+
34+
aho-corasick = { workspace = true }
35+
ignore = { workspace = true }
36+
serde = { workspace = true }
37+
tree-sitter = { workspace = true }
3538

3639
tree-sitter-bash = { version = "0.25.0", optional = true }
3740
tree-sitter-c = { version = "0.24.1", optional = true }
@@ -57,37 +60,41 @@ tree-sitter-typescript = { version = "0.23.2", optional = true }
5760
tree-sitter-yaml = { version = "0.7.1", optional = true }
5861

5962
[features]
60-
default = ["builtin-parser"]
61-
matching = ["thread-ast-engine/matching"]
63+
default = ["all-parsers", "tree-sitter-parsing"]
64+
tree-sitter-parsing = ["thread-ast-engine/parsing"]
65+
matching = ["thread-ast-engine/matching", "tree-sitter-parsing"]
66+
6267

6368
# Individual language features
64-
bash = ["tree-sitter-bash"]
65-
c = ["tree-sitter-c"]
66-
cpp = ["tree-sitter-cpp"]
67-
csharp = ["tree-sitter-c-sharp"]
68-
css = ["tree-sitter-css"]
69-
elixir = ["tree-sitter-elixir"]
70-
go = ["tree-sitter-go"]
71-
haskell = ["tree-sitter-haskell"]
72-
html = ["tree-sitter-html"]
73-
html-embedded = ["html", "css", "matching", "typescript", "tsx", "javascript"]
74-
java = ["tree-sitter-java"]
75-
javascript = ["tree-sitter-javascript"]
76-
json = ["tree-sitter-json"]
77-
kotlin = ["tree-sitter-kotlin"]
78-
lua = ["tree-sitter-lua"]
79-
php = ["tree-sitter-php"]
80-
python = ["tree-sitter-python"]
81-
ruby = ["tree-sitter-ruby"]
82-
rust = ["tree-sitter-rust"]
83-
scala = ["tree-sitter-scala"]
84-
swift = ["tree-sitter-swift"]
85-
typescript = ["tree-sitter-typescript"]
86-
tsx = ["tree-sitter-typescript"]
87-
yaml = ["tree-sitter-yaml"]
69+
bash = ["tree-sitter-bash", "tree-sitter-parsing"]
70+
c = ["tree-sitter-c", "tree-sitter-parsing"]
71+
cpp = ["tree-sitter-cpp", "tree-sitter-parsing"]
72+
csharp = ["tree-sitter-c-sharp", "tree-sitter-parsing"]
73+
css = ["tree-sitter-css", "tree-sitter-parsing"]
74+
elixir = ["tree-sitter-elixir", "tree-sitter-parsing"]
75+
go = ["tree-sitter-go", "tree-sitter-parsing"]
76+
haskell = ["tree-sitter-haskell", "tree-sitter-parsing"]
77+
html = ["tree-sitter-html", "tree-sitter-parsing"]
78+
html-embedded = ["html", "css", "matching", "typescript", "tsx", "javascript", "tree-sitter-parsing"]
79+
java = ["tree-sitter-java", "tree-sitter-parsing"]
80+
javascript = ["tree-sitter-javascript", "tree-sitter-parsing"]
81+
json = ["tree-sitter-json", "tree-sitter-parsing"]
82+
kotlin = ["tree-sitter-kotlin", "tree-sitter-parsing"]
83+
lua = ["tree-sitter-lua", "tree-sitter-parsing"]
84+
php = ["tree-sitter-php", "tree-sitter-parsing"]
85+
python = ["tree-sitter-python", "tree-sitter-parsing"]
86+
ruby = ["tree-sitter-ruby", "tree-sitter-parsing"]
87+
rust = ["tree-sitter-rust", "tree-sitter-parsing"]
88+
scala = ["tree-sitter-scala", "tree-sitter-parsing"]
89+
swift = ["tree-sitter-swift", "tree-sitter-parsing"]
90+
typescript = ["tree-sitter-typescript", "tree-sitter-parsing"]
91+
tsx = ["tree-sitter-typescript", "tree-sitter-parsing"]
92+
yaml = ["tree-sitter-yaml", "tree-sitter-parsing"]
8893

94+
#! Important: `all-parsers` feature enables all tree-sitter parsers
8995
# Language groups
90-
builtin-parser = [
96+
all-parsers = [
97+
"tree-sitter-parsing",
9198
"bash",
9299
"c",
93100
"cpp",
@@ -113,15 +120,31 @@ builtin-parser = [
113120
"tsx",
114121
"yaml",
115122
]
116-
napi-lang = [
117-
"css",
118-
"html",
119-
"html-embedded",
120-
"javascript",
121-
"typescript",
122-
"tsx",
123-
]
123+
124+
css-napi = ["tree-sitter-css", "napi-environment"]
125+
html-napi = ["tree-sitter-html", "napi-environment"]
126+
javascript-napi = ["tree-sitter-javascript", "napi-environment"]
127+
typescript-napi = ["tree-sitter-typescript", "napi-environment"]
128+
tsx-napi = ["tree-sitter-typescript", "napi-environment"]
129+
130+
napi-compatible = [
131+
"css-napi",
132+
"html-napi",
133+
"javascript-napi",
134+
"typescript-napi",
135+
"tsx-napi",
136+
]
137+
# =============== Important NAPI Information ============
138+
# Thread does **not** currently support NAPI builds.
139+
# We've included this from our fork of `ast-grep-language`, in case someone wants to build from it.
140+
# We may eventually add the capability, but it's not currently planned.
141+
# Pull requests welcome.
142+
#! Important: `napi-environment` feature disables the tree-sitter parser
143+
#! Tree-sitter cannot build for NAPI-WASM
144+
#! (that's Node API -- wasm for nodejs environments)
145+
napi-environment = []
124146
profiling = []
147+
no-enabled-langs = []
125148

126149
[dev-dependencies]
127150
criterion = { version = "0.6", features = ["html_reports"] }
@@ -130,3 +153,7 @@ thread-ast-engine = { workspace = true, features = ["matching", "parsing"] }
130153
[[bench]]
131154
name = "performance"
132155
harness = false
156+
157+
[[bench]]
158+
name = "extension_matching"
159+
harness = false

crates/language/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ These languages accept `$` in identifiers and use standard pattern processing:
6969

7070
#### Parser Groups
7171

72-
- **`builtin-parser`** (default) - Includes all language parsers
73-
- **`napi-lang`** - Includes only NAPI-compatible (WASM for Node.js environments) parsers (CSS, HTML, JavaScript, TypeScript)
72+
- **`all-parsers`** (default) - Includes all language parsers
73+
- **`napi-environment`** - Includes only NAPI-compatible (WASM for Node.js environments) parsers (CSS, HTML, JavaScript, TypeScript)
7474

7575
#### Individual Languages
7676

0 commit comments

Comments
 (0)