more optimization

kleinesfilmroellchen · kleinesfilmroellchen · commit 69aba213d3c2 · 2025-12-13T18:51:14.000+01:00
diff --git a/sof-rs/src/main.rs b/sof-rs/src/main.rs
@@ -176,21 +176,36 @@ fn sof_main(code: impl AsRef<str>, path: &Path, library_path: impl Into<PathBuf>
 	let start_time = time::Instant::now();
 	let lexed = parser::lexer::lex(code)?;
 	debug!(target: "sof::lexer", "lexed: {lexed:#?}");
-	let parsed = Arc::new(parser::parse(lexed)?);
+	let mut parsed = Arc::new(parser::parse(lexed)?);
 	debug!(target: "sof::parser", "parsed: {parsed:#?}");
+	let optimizer_start_time = time::Instant::now();
+	optimizer::run_passes(&mut parsed);
+	let optimizer_end_time = time::Instant::now();
 	let metrics = run(parsed, path, &library_path.into())?;
 	let end_time = time::Instant::now();
 
+	let parse_time = optimizer_start_time - start_time;
+	let optimize_time = optimizer_end_time - optimizer_start_time;
+	let execution_time = end_time - optimizer_end_time;
+
 	info!(
 		"Performance metrics:
-total time:   {:>13.2}μs
-tokens run:   {:>10}
-time / token: {:>13.2?}μs
-calls:        {:>10}
-GC runs:      {:>10}",
+time:             {:>13.2}μs
+  * parser:       {:>13.2}μs
+  * optimizer:    {:>13.2}μs
+  * execution:    {:>13.2}μs
+exe time / token: {:>13.2}μs
+  * optimizer:    {:>13.2}μs
+tokens run:       {:>10}
+calls:            {:>10}
+GC runs:          {:>10}",
 		(end_time - start_time).as_nanos() as f64 / 1_000.,
+		parse_time.as_nanos() as f64 / 1_000.,
+		optimize_time.as_nanos() as f64 / 1_000.,
+		execution_time.as_nanos() as f64 / 1_000.,
+		(execution_time.as_nanos() as f64 / 1000.) / metrics.token_count as f64,
+		(optimize_time.as_nanos() as f64 / 1000.) / metrics.token_count as f64,
 		metrics.token_count,
-		(end_time - start_time).as_micros() as f64 / metrics.token_count as f64,
 		metrics.call_count,
 		metrics.gc_count,
 	);
diff --git a/sof-rs/src/optimizer.rs b/sof-rs/src/optimizer.rs
@@ -5,30 +5,103 @@ use std::sync::Arc;
 use log::{debug, trace};
 
 use crate::runtime::stackable::TokenVec;
-use crate::token::Token;
+use crate::token::{InnerToken, Literal, Token};
 
 /// Single optimization pass that can be executed on a token list.
 pub type Pass = fn(tokens: &mut Vec<Token>);
 
-pub static DEFAULT_PASSES: [Pass; 1] = [passes::combine_literal_pushes];
+pub static DEFAULT_PASSES: [Pass; 2] = [passes::combine_id_calls, passes::combine_literal_pushes];
 
 pub fn run_passes(tokens: &mut TokenVec) {
 	debug!("running optimizer on {} tokens…", tokens.len());
 	// TODO: super wasteful
 	let mut work_tokens = (**tokens).clone();
 	for pass in DEFAULT_PASSES {
 		pass(&mut work_tokens);
+		trace!("after pass {:?}: {work_tokens:#?}", pass);
 	}
-	trace!("after optimizer: {work_tokens:#?}");
 	*tokens = Arc::new(work_tokens);
 }
 
+/// Recursively apply a pass to any nested token lists.
+#[inline]
+fn recurse_pass(pass: Pass, token: &mut Token) {
+	match &mut token.inner {
+		InnerToken::Literal(Literal::CodeBlock(cb)) => {
+			let mut new_block = cb.as_ref().clone();
+			pass(&mut new_block);
+			*cb = Arc::new(new_block);
+		},
+		_ => {},
+	}
+}
+
 mod passes {
 	use log::{debug, trace};
 	use miette::SourceSpan;
 	use smallvec::SmallVec;
 
-	use crate::token::{InnerToken, Token};
+	use crate::optimizer::recurse_pass;
+	use crate::token::{Command, InnerToken, Literal, Token};
+
+	/// Optimization pass which combines identifiers and following calls / double calls into simplified lookup/call
+	/// tokens.
+	pub fn combine_id_calls(tokens: &mut Vec<Token>) {
+		debug!("combine_id_calls");
+
+		let mut idx = 0;
+		while idx < tokens.len() {
+			recurse_pass(combine_id_calls, &mut tokens[idx]);
+
+			let current = &tokens[idx];
+			match &current.inner {
+				InnerToken::Literal(Literal::Identifier(id)) => {
+					let start_index = idx;
+					idx += 1;
+					let Some(Token { inner: InnerToken::Command(Command::Call), span: end_span }) = tokens.get(idx)
+					else {
+						// Nothing to optimize.
+						debug_assert!(idx == start_index + 1);
+						continue;
+					};
+
+					idx += 1;
+					let Some(Token { inner: InnerToken::Command(Command::Call), span: end_span }) = tokens.get(idx)
+					else {
+						let end_index = idx - 1;
+						// Found only a single call token afterwards.
+						trace!("combining lookup in range [{start_index}; {end_index}]");
+						tokens.splice(start_index ..= end_index, [Token {
+							inner: InnerToken::LookupName(id.clone()),
+							span:  SourceSpan::new(
+								current.span.offset().into(),
+								end_span.offset() + end_span.len() - current.span.offset(),
+							),
+						}]);
+						idx -= 1;
+						debug_assert!(idx == start_index + 1);
+						continue;
+					};
+
+					// Found two call tokens afterwards.
+					let end_index = idx;
+					trace!("combining name call in range [{start_index}; {end_index}]");
+					tokens.splice(start_index ..= end_index, [Token {
+						inner: InnerToken::CallName(id.clone()),
+						span:  SourceSpan::new(
+							current.span.offset().into(),
+							end_span.offset() + end_span.len() - current.span.offset(),
+						),
+					}]);
+					idx -= 1;
+					debug_assert!(idx == start_index + 1);
+					continue;
+				},
+				_ => {},
+			}
+			idx += 1;
+		}
+	}
 
 	/// Optimization pass which combines multiple literal tokens into a single token which pushes multiple literals at
 	/// once.
@@ -42,6 +115,8 @@ mod passes {
 
 		let mut idx = 0;
 		while idx < tokens.len() {
+			recurse_pass(combine_literal_pushes, &mut tokens[idx]);
+
 			let current = &tokens[idx];
 			match &current.inner {
 				InnerToken::Literals(literals) => {
diff --git a/sof-rs/src/runtime.rs b/sof-rs/src/runtime.rs
@@ -69,7 +69,7 @@ impl<'gc> Stack<'gc> {
 		// fast path for top nametable
 		match self.main[self.top_nametable] {
 			Stackable::Nametable(nt) => nt.borrow().lookup(name, span).ok(),
-			_ => None,
+			_ => unreachable!("missing nametable"),
 		}
 		.or_else(|| {
 			self.main.iter().rev().find_map(|stackable| match stackable {
diff --git a/sof-rs/src/runtime/interpreter.rs b/sof-rs/src/runtime/interpreter.rs
@@ -11,6 +11,7 @@ use miette::SourceSpan;
 use smallvec::{SmallVec, smallvec};
 
 use crate::arc_iter::ArcVecIter;
+use crate::call_builtin_function;
 use crate::error::Error;
 use crate::identifier::Identifier;
 use crate::lib::DEFAULT_REGISTRY;
@@ -21,7 +22,6 @@ use crate::runtime::stackable::{BuiltinType, Function, TokenVec};
 use crate::runtime::util::{SwitchCase, SwitchCases, UtilityData};
 use crate::runtime::{Stack, StackArena, Stackable};
 use crate::token::{Command, InnerToken, Literal, Token};
-use crate::{call_builtin_function, optimizer};
 
 #[derive(Default, Clone, Copy)]
 #[non_exhaustive]
@@ -32,9 +32,8 @@ pub struct Metrics {
 	pub call_count:  usize,
 }
 
-pub fn run(mut tokens: TokenVec, file_path: impl Into<PathBuf>, library_path: &Path) -> Result<Metrics, Error> {
+pub fn run(tokens: TokenVec, file_path: impl Into<PathBuf>, library_path: &Path) -> Result<Metrics, Error> {
 	let mut arena: StackArena = new_arena(library_path);
-	optimizer::run_passes(&mut tokens);
 	run_on_arena(&mut arena, tokens, file_path, library_path)
 }
 
@@ -502,6 +501,15 @@ fn execute_token<'a>(token: &Token, mc: &Mutation<'a>, stack: &mut Stack<'a>) ->
 			let callable = stack.pop(token.span)?;
 			callable.enter_call(mc, stack, token.span)
 		},
+		InnerToken::CallName(id) => {
+			let value = stack.lookup(id, token.span)?;
+			value.enter_call(mc, stack, token.span)
+		},
+		InnerToken::LookupName(id) => {
+			let value = stack.lookup(id, token.span)?;
+			stack.push(value);
+			no_action()
+		},
 		InnerToken::Command(Command::FieldAccess) => {
 			let callable = stack.pop(token.span)?;
 			let object = stack.pop(token.span)?;
@@ -642,11 +650,13 @@ fn execute_token<'a>(token: &Token, mc: &Mutation<'a>, stack: &mut Stack<'a>) ->
 			// insert the while body logic at the start so it is executed after the initial loop body action(s)
 			// the body action is *empty* so the return behavior is immediately run and it figures out the state of
 			// things from the utility stack setup
-			actions.insert(0, InterpreterAction::ExecuteCall {
-				#[allow(clippy::borrow_interior_mutable_const)]
-				code:                                                 EMPTY_VEC.clone(),
-				return_behavior:                                      CallReturnBehavior::Loop,
-			}).expect("interpreter actions grew too large");
+			actions
+				.insert(0, InterpreterAction::ExecuteCall {
+					#[allow(clippy::borrow_interior_mutable_const)]
+					code:                                                 EMPTY_VEC.clone(),
+					return_behavior:                                      CallReturnBehavior::Loop,
+				})
+				.expect("interpreter actions grew too large");
 			Ok(actions)
 		},
 		// almost like if, but with the special utility stack
diff --git a/sof-rs/src/runtime/module.rs b/sof-rs/src/runtime/module.rs
@@ -8,6 +8,7 @@ use internment::ArcIntern;
 use log::debug;
 
 use crate::error::Error;
+use crate::optimizer;
 use crate::parser::{self, lexer};
 use crate::runtime::stackable::TokenVec;
 
@@ -59,7 +60,8 @@ impl ModuleRegistry {
 		let code = std::fs::read_to_string(&*module_path)
 			.map_err(|err| Error::ModuleFileNotReadable { path: module_path.to_path_buf(), inner: err })?;
 		let lexed = lexer::lex(code)?;
-		let parsed = Arc::new(parser::parse(lexed)?);
+		let mut parsed = Arc::new(parser::parse(lexed)?);
+		optimizer::run_passes(&mut parsed);
 
 		self.parsed_modules.insert(module_path.clone(), parsed.clone());
 		Ok((module_path, parsed))
diff --git a/sof-rs/src/token.rs b/sof-rs/src/token.rs
@@ -1,8 +1,8 @@
 use std::cmp::Ordering;
 use std::fmt::{Debug, Display};
 
-use lean_string::LeanString;
 use gc_arena::{Gc, Mutation};
+use lean_string::LeanString;
 use miette::SourceSpan;
 
 use crate::identifier::Identifier;
@@ -21,6 +21,10 @@ pub enum InnerToken {
 	// since they are the most common case, this saves ~10% runtime.
 	Literal(Literal),
 	Literals(smallvec::SmallVec<[Literal; 3]>),
+
+	// optimizer-generated tokens
+	LookupName(Identifier),
+	CallName(Identifier),
 }
 
 /// Nonrecursive literals.
@@ -249,6 +253,8 @@ impl Debug for Token {
 			InnerToken::Literal(arg0) => f.debug_list().entry(arg0).finish(),
 			InnerToken::WhileBody => f.debug_tuple("WhileBody").finish(),
 			InnerToken::SwitchBody => f.debug_tuple("SwitchBody").finish(),
+			InnerToken::CallName(name) => f.debug_tuple("Call").field(name).finish(),
+			InnerToken::LookupName(name) => f.debug_tuple("Lookup").field(name).finish(),
 		}?;
 		write!(f, ", {:?} }}", (self.span.offset(), self.span.len()))
 	}

Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,7 @@ impl<'gc> Stack<'gc> {`
`69`	`69`	`// fast path for top nametable`
`70`	`70`	`match self.main[self.top_nametable] {`
`71`	`71`	`Stackable::Nametable(nt) => nt.borrow().lookup(name, span).ok(),`
`72`		`- _ => None,`
	`72`	`+ _ => unreachable!("missing nametable"),`
`73`	`73`	`}`
`74`	`74`	`.or_else(\|\| {`
`75`	`75`	`self.main.iter().rev().find_map(\|stackable\| match stackable {`