Skip to content

Commit 52c7adf

Browse files
committed
perf: exclude nodes that don't need to be queried
1 parent 71c7df6 commit 52c7adf

File tree

3 files changed

+75
-11
lines changed

3 files changed

+75
-11
lines changed

crates/biome_analyze/src/registry.rs

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
use crate::{
22
AddVisitor, AnalysisFilter, GroupCategory, QueryMatcher, Rule, RuleGroup, RuleKey,
3-
RuleMetadata, ServiceBag, SignalEntry, Visitor,
3+
RuleMetadata, ServiceBag, SignalEntry, SyntaxVisitor, Visitor,
44
context::RuleContext,
55
matcher::{GroupKey, MatchQueryParams},
66
query::{QueryKey, Queryable},
77
signals::RuleSignal,
88
};
99
use biome_diagnostics::Error;
10-
use biome_rowan::{AstNode, Language, RawSyntaxKind, SyntaxKind, SyntaxNode};
10+
use biome_rowan::{AstNode, Language, RawSyntaxKind, SyntaxKind, SyntaxKindSet, SyntaxNode};
1111
use rustc_hash::{FxHashMap, FxHashSet};
1212
use std::{
1313
any::TypeId,
1414
borrow,
1515
collections::{BTreeMap, BTreeSet},
1616
};
1717

18+
/// Number of phases supported by the [RuleRegistry].
19+
const PHASE_COUNT: usize = 2;
20+
1821
/// Defines all the phases that the [RuleRegistry] supports.
1922
#[repr(usize)]
2023
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -98,7 +101,7 @@ impl<L: Language> RegistryVisitor<L> for MetadataRegistry {
98101
/// after the "SemanticModel" is ready, which demands a whole transverse of the parsed tree.
99102
pub struct RuleRegistry<L: Language> {
100103
/// Holds a collection of rules for each phase.
101-
phase_rules: [PhaseRules<L>; 2],
104+
phase_rules: [PhaseRules<L>; PHASE_COUNT],
102105
}
103106

104107
impl<L: Language + Default> RuleRegistry<L> {
@@ -115,6 +118,7 @@ impl<L: Language + Default> RuleRegistry<L> {
115118
visitors: BTreeMap::default(),
116119
services: ServiceBag::default(),
117120
diagnostics: Vec::new(),
121+
syntax_kind_sets: [SyntaxKindSet::empty(); PHASE_COUNT],
118122
}
119123
}
120124
}
@@ -143,6 +147,10 @@ pub struct RuleRegistryBuilder<'a, L: Language> {
143147
// Service Bag
144148
services: ServiceBag,
145149
diagnostics: Vec<Error>,
150+
/// Per-phase union of all [SyntaxKindSet] values from registered rules.
151+
/// Used to create a filtered [SyntaxVisitor] that only emits query matches
152+
/// for node kinds that at least one rule cares about.
153+
syntax_kind_sets: [SyntaxKindSet<L>; PHASE_COUNT],
146154
}
147155

148156
impl<L: Language + Default + 'static> RegistryVisitor<L> for RuleRegistryBuilder<'_, L> {
@@ -167,13 +175,15 @@ impl<L: Language + Default + 'static> RegistryVisitor<L> for RuleRegistryBuilder
167175
return;
168176
}
169177

170-
let phase = R::phase() as usize;
171-
let phase = &mut self.registry.phase_rules[phase];
178+
let phase_index = R::phase() as usize;
179+
let phase = &mut self.registry.phase_rules[phase_index];
172180

173181
let rule = RegistryRule::new::<R>(phase.rule_states.len());
174182

175183
match <R::Query as Queryable>::key() {
176184
QueryKey::Syntax(key) => {
185+
self.syntax_kind_sets[phase_index] = self.syntax_kind_sets[phase_index].union(key);
186+
177187
let TypeRules::SyntaxRules { rules } = phase
178188
.type_rules
179189
.entry(TypeId::of::<SyntaxNode<L>>())
@@ -242,8 +252,25 @@ type BuilderResult<L> = (
242252
BTreeMap<(Phases, TypeId), Box<dyn Visitor<Language = L>>>,
243253
);
244254

245-
impl<L: Language> RuleRegistryBuilder<'_, L> {
246-
pub fn build(self) -> BuilderResult<L> {
255+
impl<L: Language + 'static> RuleRegistryBuilder<'_, L> {
256+
pub fn build(mut self) -> BuilderResult<L> {
257+
// Replace SyntaxVisitor instances with filtered versions that only
258+
// emit query matches for node kinds that at least one rule queries.
259+
// This avoids per-node Box::new() allocations for kinds no rule
260+
// cares about.
261+
let phases = [Phases::Syntax, Phases::Semantic];
262+
for (phase_index, &phase) in phases.iter().enumerate() {
263+
let key = (phase, TypeId::of::<SyntaxVisitor<L>>());
264+
if self.visitors.contains_key(&key) {
265+
self.visitors.insert(
266+
key,
267+
Box::new(SyntaxVisitor::with_kind_set(
268+
self.syntax_kind_sets[phase_index],
269+
)),
270+
);
271+
}
272+
}
273+
247274
(
248275
self.registry,
249276
self.services,

crates/biome_analyze/src/syntax.rs

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use biome_rowan::{AstNode, Language, SyntaxNode, WalkEvent};
1+
use biome_rowan::{AstNode, Language, SyntaxKindSet, SyntaxNode, WalkEvent};
22

33
use crate::{
44
AddVisitor, Phases, QueryKey, QueryMatch, Queryable, ServiceBag, Visitor, VisitorContext,
@@ -50,11 +50,31 @@ pub struct SyntaxVisitor<L: Language> {
5050
/// of that subtree. The visitor will then ignore all events until it
5151
/// receives a [WalkEvent::Leave] for the `skip_subtree` node
5252
skip_subtree: Option<SyntaxNode<L>>,
53+
/// Optional filter to restrict which [SyntaxKind] values are emitted as
54+
/// query matches. When [None], all nodes are emitted (unfiltered).
55+
/// When [Some], only nodes whose kind is in the set are emitted,
56+
/// avoiding unnecessary allocations for nodes no rule cares about.
57+
kind_set: Option<SyntaxKindSet<L>>,
58+
}
59+
60+
impl<L: Language> SyntaxVisitor<L> {
61+
/// Create a [SyntaxVisitor] that only emits query matches for nodes whose
62+
/// `SyntaxKind` is in the provided set. Nodes with kinds not in the set
63+
/// are skipped entirely, avoiding per-node allocation overhead.
64+
pub fn with_kind_set(kind_set: SyntaxKindSet<L>) -> Self {
65+
Self {
66+
skip_subtree: None,
67+
kind_set: Some(kind_set),
68+
}
69+
}
5370
}
5471

5572
impl<L: Language> Default for SyntaxVisitor<L> {
5673
fn default() -> Self {
57-
Self { skip_subtree: None }
74+
Self {
75+
skip_subtree: None,
76+
kind_set: None,
77+
}
5878
}
5979
}
6080

@@ -86,6 +106,12 @@ impl<L: Language + 'static> Visitor for SyntaxVisitor<L> {
86106
return;
87107
}
88108

109+
if let Some(kind_set) = self.kind_set
110+
&& !kind_set.matches(node.kind())
111+
{
112+
return;
113+
}
114+
89115
ctx.match_query(node.clone());
90116
}
91117
}

crates/biome_rowan/src/ast/mod.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,30 @@ use crate::{
2424
pub use batch::*;
2525
pub use mutation::{AstNodeExt, AstNodeListExt, AstSeparatedListExt};
2626

27+
/// Number of `u128` buckets used by [SyntaxKindSet] to represent the bitfield.
28+
/// This determines the maximum [RawSyntaxKind] value that can be stored:
29+
/// `SYNTAX_KIND_SET_BUCKETS * 128 - 1`. All language syntax kind enums must
30+
/// have their `__LAST` variant within this limit.
31+
pub const SYNTAX_KIND_SET_BUCKETS: usize = 5;
32+
2733
/// Represents a set of [SyntaxKind] as a bitfield, with each bit representing
2834
/// whether the corresponding [RawSyntaxKind] value is contained in the set
2935
///
3036
/// This is similar to the `TokenSet` struct in `biome_js_parser`, with the
3137
/// bitfield here being twice as large as it needs to cover all nodes as well
3238
/// as all token kinds
3339
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
34-
pub struct SyntaxKindSet<L: Language>([u128; 5], PhantomData<L>);
40+
pub struct SyntaxKindSet<L: Language>([u128; SYNTAX_KIND_SET_BUCKETS], PhantomData<L>);
3541

3642
impl<L> SyntaxKindSet<L>
3743
where
3844
L: Language,
3945
{
46+
/// Create an empty [SyntaxKindSet] containing no [SyntaxKind] values
47+
pub const fn empty() -> Self {
48+
Self([0; SYNTAX_KIND_SET_BUCKETS], PhantomData)
49+
}
50+
4051
/// Create a new [SyntaxKindSet] containing only the provided [SyntaxKind]
4152
pub fn of(kind: L::Kind) -> Self {
4253
Self::from_raw(kind.to_raw())
@@ -60,7 +71,7 @@ where
6071
let shift = kind % u128::BITS as u16;
6172
let mask = 1 << shift;
6273

63-
let mut bits = [0; 5];
74+
let mut bits = [0; SYNTAX_KIND_SET_BUCKETS];
6475
bits[index] = mask;
6576

6677
Self(bits, PhantomData)

0 commit comments

Comments
 (0)