Perf option: ascii happy path

johanrd · johanrd · commit 4450f2fdeba6 · 2026-03-16T15:35:08.000+01:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -47,6 +47,10 @@ features = [
     "fast-rng"  # Use a faster (but still sufficiently random) RNG
 ]
 
+[[bench]]
+name = "parse_bench"
+harness = false
+
 [dev-dependencies]
 difference = "2"
 regex = "1.11.1"
diff --git a/benches/parse_bench.rs b/benches/parse_bench.rs
@@ -0,0 +1,127 @@
+use content_tag::{Options, Preprocessor};
+use std::time::Instant;
+
+fn bench_parse(name: &str, src: &str, iterations: u32) -> f64 {
+    // Warmup
+    for _ in 0..100 {
+        let p = Preprocessor::new();
+        let _ = p.parse(src, Options::default());
+    }
+
+    // Run 3 rounds, take the minimum
+    let mut best = f64::MAX;
+    for _ in 0..3 {
+        let start = Instant::now();
+        for _ in 0..iterations {
+            let p = Preprocessor::new();
+            let _ = p.parse(src, Options::default());
+        }
+        let elapsed = start.elapsed();
+        let per_iter = elapsed.as_nanos() as f64 / iterations as f64;
+        if per_iter < best {
+            best = per_iter;
+        }
+    }
+
+    println!(
+        "{:<35} {:>8.1}µs per parse  ({} chars)",
+        name,
+        best / 1000.0,
+        src.len(),
+    );
+    best / 1000.0
+}
+
+fn main() {
+    println!("=== Fine-grained scaling: 1 to 20 templates (ASCII) ===\n");
+
+    let chunk = r#"
+import Component from '@glimmer/component';
+class Comp extends Component {
+  <template>
+    <div class="container">
+      <h1>{{this.title}}</h1>
+      <p>{{this.description}}</p>
+    </div>
+  </template>
+}
+"#;
+
+    let mut results = Vec::new();
+    for repeats in 1..=20 {
+        let src = chunk.repeat(repeats);
+        let us = bench_parse(
+            &format!("{:>2} templates ({:>4} chars)", repeats, src.len()),
+            &src,
+            3000,
+        );
+        results.push((repeats, src.len(), us));
+    }
+
+    println!("\n=== Per-template marginal cost ===\n");
+    for i in 1..results.len() {
+        let (t, _, us) = results[i];
+        let (_, _, prev_us) = results[i - 1];
+        let marginal = us - prev_us;
+        println!(
+            "template {:>2}: +{:>5.1}µs marginal cost  ({:.1}µs total, {:.1}µs/template avg)",
+            t,
+            marginal,
+            us,
+            us / t as f64
+        );
+    }
+
+    println!("\n=== Non-ASCII scaling: 1 to 10 templates ===\n");
+
+    let mb_chunk = "
+import Component from '@glimmer/component';
+class Comp extends Component {
+  <template>
+    <div class=\"container\">
+      <h1>{{this.title}} 🎉 中文</h1>
+      <p>{{this.description}}</p>
+    </div>
+  </template>
+}
+";
+
+    for repeats in [1, 2, 3, 5, 10] {
+        let src = mb_chunk.repeat(repeats);
+        bench_parse(
+            &format!(
+                "{:>2} templates, multibyte ({:>4} chars)",
+                repeats,
+                src.len()
+            ),
+            &src,
+            3000,
+        );
+    }
+
+    println!("\n=== Typical .gts files ===\n");
+
+    let small = r#"
+import Component from '@glimmer/component';
+export default class extends Component {
+  <template><div>{{this.title}}</div></template>
+}"#;
+
+    let no_template = r#"
+import { tracked } from '@glimmer/tracking';
+import { action } from '@ember/object';
+import Service, { service } from '@ember/service';
+
+export default class AuthService extends Service {
+  @service declare session: any;
+  @tracked count = 0;
+
+  @action
+  increment() { this.count++; }
+
+  get doubled() { return this.count * 2; }
+}"#;
+
+    bench_parse("small component (1 template)", small, 5000);
+    bench_parse("utility file (no template)", no_template, 5000);
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -88,6 +88,7 @@ impl Preprocessor {
 
             let mut visitor = locate::LocateContentTagVisitor {
                 occurrences: Default::default(),
+                is_ascii: src.is_ascii(),
                 src: src.to_string(),
             };
 
diff --git a/src/locate.rs b/src/locate.rs
@@ -6,10 +6,11 @@ use swc_ecma_ast::{
 };
 use swc_ecma_visit::{Visit, VisitWith};
 
-#[derive(Default, Debug)]
+#[derive(Debug)]
 pub struct LocateContentTagVisitor {
     pub occurrences: Vec<Occurrence>,
     pub src: String,
+    pub is_ascii: bool,
 }
 
 #[derive(Eq, PartialEq, Debug, Serialize)]
@@ -32,10 +33,10 @@ impl LocateContentTagVisitor {
             kind,
             tag_name: "template".to_owned(),
             contents: contents.value.to_string(),
-            range: Range::new(&self.src, span),
-            start_range: Range::new(&self.src, &opening.span),
-            content_range: Range::new(&self.src, &contents.span),
-            end_range: Range::new(&self.src, &closing.span),
+            range: Range::new(&self.src, span, self.is_ascii),
+            start_range: Range::new(&self.src, &opening.span, self.is_ascii),
+            content_range: Range::new(&self.src, &contents.span, self.is_ascii),
+            end_range: Range::new(&self.src, &closing.span, self.is_ascii),
         };
 
         self.occurrences.push(occurrence);
@@ -108,14 +109,28 @@ pub struct Range {
     end_utf16_codepoint: usize,
 }
 impl Range {
-    pub fn new(src: &str, span: &Span) -> Range {
-        Range {
-            start_byte: span.lo.0 as usize - 1,
-            end_byte: span.hi.0 as usize - 1,
-            start_char: src[..span.lo.0 as usize - 1].chars().count(),
-            end_char: src[..span.hi.0 as usize - 1].chars().count(),
-            start_utf16_codepoint: src[..span.lo.0 as usize - 1].encode_utf16().count(),
-            end_utf16_codepoint: src[..span.hi.0 as usize - 1].encode_utf16().count(),
+    pub fn new(src: &str, span: &Span, is_ascii: bool) -> Range {
+        let start_byte = span.lo.0 as usize - 1;
+        let end_byte = span.hi.0 as usize - 1;
+        if is_ascii {
+            // For ASCII sources, byte/char/utf16 offsets are all identical.
+            Range {
+                start_byte,
+                end_byte,
+                start_char: start_byte,
+                end_char: end_byte,
+                start_utf16_codepoint: start_byte,
+                end_utf16_codepoint: end_byte,
+            }
+        } else {
+            Range {
+                start_byte,
+                end_byte,
+                start_char: src[..start_byte].chars().count(),
+                end_char: src[..end_byte].chars().count(),
+                start_utf16_codepoint: src[..start_byte].encode_utf16().count(),
+                end_utf16_codepoint: src[..end_byte].encode_utf16().count(),
+            }
         }
     }
 }