⚡️ Improve token optimization efficiency

hyperb1iss · hyperb1iss · commit 4e893818ed74 · 2025-02-13T22:11:40.000-08:00
Enhance the token optimization process across commit
service and token optimizer modules.

- Integrate `TokenOptimizer` in commit service to handle token
  limits effectively when generating AI prompts.
- Add logging to track token usage and optimization stages,
  ensuring clarity and easier debugging.
- Update configuration handling to account for token limits in
  provider configs and ensure efficient resource usage.
- Modifications include refining and truncating strings based
  on available token budget, prioritizing diffs, commits, and
  file contents accordingly.

This spins token budgeting with efficiency, making the AI
prompt generation future-proof and scalable.
diff --git a/src/commit/service.rs b/src/commit/service.rs
@@ -8,7 +8,9 @@ use crate::config::Config;
 use crate::context::{CommitContext, GeneratedMessage};
 use crate::git::{CommitResult, GitRepo};
 use crate::llm;
-use crate::llm_providers::LLMProviderType;
+use crate::llm_providers::{get_provider_metadata, LLMProviderType};
+use crate::log_debug;
+use crate::token_optimizer::TokenOptimizer;
 
 /// Service for handling Git commit operations with AI assistance
 pub struct IrisCommitService {
@@ -93,16 +95,68 @@ impl IrisCommitService {
         config_clone.instruction_preset = preset.to_string();
         config_clone.instructions = instructions.to_string();
 
-        let context = self.get_git_info().await?;
+        let mut context = self.get_git_info().await?;
 
+        // Get the token limit from the provider config
+        let token_limit = config_clone
+            .providers
+            .get(&self.provider_type.to_string())
+            .and_then(|p| p.token_limit)
+            .unwrap_or_else(|| get_provider_metadata(&self.provider_type).default_token_limit);
+
+        // Create system prompt first to know its token count
         let system_prompt = create_system_prompt(&config_clone)?;
+
+        // Create a token optimizer to count tokens
+        let optimizer = TokenOptimizer::new(token_limit);
+        let system_tokens = optimizer.count_tokens(&system_prompt);
+
+        log_debug!("Token limit: {}", token_limit);
+        log_debug!("System prompt tokens: {}", system_tokens);
+
+        // Reserve tokens for system prompt and some buffer for formatting
+        let context_token_limit = token_limit.saturating_sub(system_tokens + 1000); // 1000 token buffer for safety
+        log_debug!("Available tokens for context: {}", context_token_limit);
+
+        // Count tokens before optimization
+        let user_prompt_before = create_user_prompt(&context);
+        let total_tokens_before = system_tokens + optimizer.count_tokens(&user_prompt_before);
+        log_debug!("Total tokens before optimization: {}", total_tokens_before);
+
+        // Optimize the context with remaining token budget
+        context.optimize(context_token_limit);
+
         let user_prompt = create_user_prompt(&context);
+        let user_tokens = optimizer.count_tokens(&user_prompt);
+        let total_tokens = system_tokens + user_tokens;
+
+        log_debug!("User prompt tokens after optimization: {}", user_tokens);
+        log_debug!("Total tokens after optimization: {}", total_tokens);
+
+        // If we're still over the limit, truncate the user prompt directly
+        let final_user_prompt = if total_tokens > token_limit {
+            log_debug!(
+                "Total tokens {} still exceeds limit {}, truncating user prompt",
+                total_tokens,
+                token_limit
+            );
+            let max_user_tokens = token_limit.saturating_sub(system_tokens + 100); // 100 token safety buffer
+            optimizer.truncate_string(&user_prompt, max_user_tokens)
+        } else {
+            user_prompt
+        };
+
+        let final_tokens = system_tokens + optimizer.count_tokens(&final_user_prompt);
+        log_debug!(
+            "Final total tokens after potential truncation: {}",
+            final_tokens
+        );
 
         let mut generated_message = llm::get_refined_message::<GeneratedMessage>(
             &config_clone,
             &self.provider_type,
             &system_prompt,
-            &user_prompt,
+            &final_user_prompt,
         )
         .await?;
 
diff --git a/src/config.rs b/src/config.rs
@@ -247,10 +247,17 @@ impl ProviderConfig {
 
     /// Convert to `LLMProviderConfig`
     pub fn to_llm_provider_config(&self) -> LLMProviderConfig {
+        let mut additional_params = self.additional_params.clone();
+
+        // Add token limit to additional params if set
+        if let Some(limit) = self.token_limit {
+            additional_params.insert("token_limit".to_string(), limit.to_string());
+        }
+
         LLMProviderConfig {
             api_key: self.api_key.clone(),
             model: self.model.clone(),
-            additional_params: self.additional_params.clone(),
+            additional_params,
         }
     }
 }
diff --git a/src/git.rs b/src/git.rs
@@ -291,6 +291,7 @@ impl GitRepo {
         if Self::is_binary_diff(&diff_string) {
             Ok("[Binary file changed]".to_string())
         } else {
+            log_debug!("Generated diff for {} ({} bytes)", path, diff_string.len());
             Ok(diff_string)
         }
     }
diff --git a/src/token_optimizer.rs b/src/token_optimizer.rs
@@ -1,4 +1,5 @@
 use crate::context::CommitContext;
+use crate::log_debug;
 use tiktoken_rs::cl100k_base;
 
 pub struct TokenOptimizer {
@@ -17,19 +18,32 @@ impl TokenOptimizer {
 
     pub fn optimize_context(&self, context: &mut CommitContext) {
         let mut remaining_tokens = self.max_tokens;
+        let mut total_tokens = 0;
 
         // Step 1: Allocate tokens for the diffs (highest priority)
         for file in &mut context.staged_files {
             let diff_tokens = self.count_tokens(&file.diff);
-            if diff_tokens > remaining_tokens {
+            if total_tokens + diff_tokens > self.max_tokens {
+                log_debug!(
+                    "Truncating diff for {} from {} tokens to {} tokens",
+                    file.path,
+                    diff_tokens,
+                    remaining_tokens
+                );
                 file.diff = self.truncate_string(&file.diff, remaining_tokens);
+                total_tokens += remaining_tokens;
                 remaining_tokens = 0;
             } else {
-                remaining_tokens = remaining_tokens.saturating_sub(diff_tokens);
+                total_tokens += diff_tokens;
+                remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
             }
 
             if remaining_tokens == 0 {
                 // If we exhaust the tokens in step 1, clear commits and contents
+                log_debug!(
+                    "Token budget exhausted after diffs (total: {}), clearing commits and contents",
+                    total_tokens
+                );
                 Self::clear_commits_and_contents(context);
                 return;
             }
@@ -38,15 +52,26 @@ impl TokenOptimizer {
         // Step 2: Allocate remaining tokens for recent commits (medium priority)
         for commit in &mut context.recent_commits {
             let commit_tokens = self.count_tokens(&commit.message);
-            if commit_tokens > remaining_tokens {
+            if total_tokens + commit_tokens > self.max_tokens {
+                log_debug!(
+                    "Truncating commit message from {} tokens to {} tokens",
+                    commit_tokens,
+                    remaining_tokens
+                );
                 commit.message = self.truncate_string(&commit.message, remaining_tokens);
+                total_tokens += remaining_tokens;
                 remaining_tokens = 0;
             } else {
-                remaining_tokens = remaining_tokens.saturating_sub(commit_tokens);
+                total_tokens += commit_tokens;
+                remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
             }
 
             if remaining_tokens == 0 {
                 // If we exhaust the tokens in step 2, clear contents
+                log_debug!(
+                    "Token budget exhausted after commits (total: {}), clearing contents",
+                    total_tokens
+                );
                 Self::clear_contents(context);
                 return;
             }
@@ -56,18 +81,32 @@ impl TokenOptimizer {
         for file in &mut context.staged_files {
             if let Some(content) = &mut file.content {
                 let content_tokens = self.count_tokens(content);
-                if content_tokens > remaining_tokens {
+                if total_tokens + content_tokens > self.max_tokens {
+                    log_debug!(
+                        "Truncating file content for {} from {} tokens to {} tokens",
+                        file.path,
+                        content_tokens,
+                        remaining_tokens
+                    );
                     *content = self.truncate_string(content, remaining_tokens);
+                    total_tokens += remaining_tokens;
                     remaining_tokens = 0;
                 } else {
-                    remaining_tokens = remaining_tokens.saturating_sub(content_tokens);
+                    total_tokens += content_tokens;
+                    remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
                 }
 
                 if remaining_tokens == 0 {
+                    log_debug!(
+                        "Token budget exhausted after file contents (total: {})",
+                        total_tokens
+                    );
                     return; // Exit early if we've exhausted the token budget
                 }
             }
         }
+
+        log_debug!("Final token count after optimization: {}", total_tokens);
     }
 
     // Truncate a string to fit within the specified token limit

Original file line number	Diff line number	Diff line change
`@@ -247,10 +247,17 @@ impl ProviderConfig {`
`247`	`247`
`248`	`248`	/// Convert to `LLMProviderConfig`
`249`	`249`	`pub fn to_llm_provider_config(&self) -> LLMProviderConfig {`
	`250`	`+ let mut additional_params = self.additional_params.clone();`
	`251`	`+`
	`252`	`+ // Add token limit to additional params if set`
	`253`	`+ if let Some(limit) = self.token_limit {`
	`254`	`+ additional_params.insert("token_limit".to_string(), limit.to_string());`
	`255`	`+ }`
	`256`	`+`
`250`	`257`	`LLMProviderConfig {`
`251`	`258`	`api_key: self.api_key.clone(),`
`252`	`259`	`model: self.model.clone(),`
`253`		`- additional_params: self.additional_params.clone(),`
	`260`	`+ additional_params,`
`254`	`261`	`}`
`255`	`262`	`}`
`256`	`263`	`}`
Original file line number	Diff line number	Diff line change
`@@ -291,6 +291,7 @@ impl GitRepo {`
`291`	`291`	`if Self::is_binary_diff(&diff_string) {`
`292`	`292`	`Ok("[Binary file changed]".to_string())`
`293`	`293`	`} else {`
	`294`	`+ log_debug!("Generated diff for {} ({} bytes)", path, diff_string.len());`
`294`	`295`	`Ok(diff_string)`
`295`	`296`	`}`
`296`	`297`	`}`