Optimize: lazy load gpt-tokenizer to fix OpenCode v1.0.104+ crash

Tarquinen · Tarquinen · commit d92712e38405 · 2025-11-22T23:04:53.000-05:00
- Make gpt-tokenizer import dynamic to avoid loading 53MB package during initialization
- Update estimateTokensBatch to async with lazy import
- Update calculateTokensSaved and all callers to handle async tokenization
- Plugin now loads instantly, only downloads tokenizer when first needed
- Fixes crash in OpenCode v1.0.104+ caused by large eager imports
- Bump version to 0.3.2
diff --git a/lib/janitor.ts b/lib/janitor.ts
@@ -528,7 +528,7 @@ export class Janitor {
     /**
      * Helper function to calculate token savings from tool outputs
      */
-    private calculateTokensSaved(prunedIds: string[], toolOutputs: Map<string, string>): number {
+    private async calculateTokensSaved(prunedIds: string[], toolOutputs: Map<string, string>): Promise<number> {
         const outputsToTokenize: string[] = []
         
         for (const prunedId of prunedIds) {
@@ -539,8 +539,8 @@ export class Janitor {
         }
         
         if (outputsToTokenize.length > 0) {
-            // Use batch tokenization for efficiency
-            const tokenCounts = estimateTokensBatch(outputsToTokenize, this.logger)
+            // Use batch tokenization for efficiency (lazy loads gpt-tokenizer)
+            const tokenCounts = await estimateTokensBatch(outputsToTokenize, this.logger)
             return tokenCounts.reduce((sum, count) => sum + count, 0)
         }
         
@@ -593,7 +593,7 @@ export class Janitor {
         if (deduplicatedIds.length === 0) return
 
         // Calculate token savings
-        const tokensSaved = this.calculateTokensSaved(deduplicatedIds, toolOutputs)
+        const tokensSaved = await this.calculateTokensSaved(deduplicatedIds, toolOutputs)
         const tokensFormatted = formatTokenCount(tokensSaved)
 
         const toolText = deduplicatedIds.length === 1 ? 'tool' : 'tools'
@@ -647,7 +647,7 @@ export class Janitor {
 
         // Calculate token savings
         const allPrunedIds = [...deduplicatedIds, ...llmPrunedIds]
-        const tokensSaved = this.calculateTokensSaved(allPrunedIds, toolOutputs)
+        const tokensSaved = await this.calculateTokensSaved(allPrunedIds, toolOutputs)
         const tokensFormatted = formatTokenCount(tokensSaved)
 
         let message = `🧹 DCP: Saved ~${tokensFormatted} tokens (${totalPruned} tool${totalPruned > 1 ? 's' : ''} pruned)\n`
diff --git a/lib/tokenizer.ts b/lib/tokenizer.ts
@@ -4,9 +4,12 @@
  * Uses gpt-tokenizer to provide token counts for text content.
  * Works with any LLM provider - provides accurate counts for OpenAI models
  * and reasonable approximations for other providers.
+ * 
+ * NOTE: gpt-tokenizer is lazily imported to avoid loading the 53MB package
+ * during plugin initialization. The package is only loaded when tokenization
+ * is actually needed.
  */
 
-import { encode } from 'gpt-tokenizer'
 import type { Logger } from './logger'
 
 /**
@@ -16,11 +19,14 @@ import type { Logger } from './logger'
  * @param logger - Optional logger instance
  * @returns Array of token counts
  */
-export function estimateTokensBatch(
+export async function estimateTokensBatch(
     texts: string[],
     logger?: Logger
-): number[] {
+): Promise<number[]> {
     try {
+        // Lazy import - only load the 53MB gpt-tokenizer package when actually needed
+        const { encode } = await import('gpt-tokenizer')
+        
         const results = texts.map(text => {
             const tokens = encode(text)
             return tokens.length
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "$schema": "https://json.schemastore.org/package.json",
   "name": "@tarquinen/opencode-dcp",
-  "version": "0.3.1",
+  "version": "0.3.2",
   "type": "module",
   "description": "OpenCode plugin that optimizes token usage by pruning obsolete tool outputs from conversation context",
   "main": "./dist/index.js",

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"$schema": "https://json.schemastore.org/package.json",`
`3`	`3`	`"name": "@tarquinen/opencode-dcp",`
`4`		`- "version": "0.3.1",`
	`4`	`+ "version": "0.3.2",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"description": "OpenCode plugin that optimizes token usage by pruning obsolete tool outputs from conversation context",`
`7`	`7`	`"main": "./dist/index.js",`