Merge pull request #3 from lockllm/compression

anthonyhalim150 · web-flow · commit 82f8f681d7d3 · 2026-02-28T00:53:54.000+07:00
Compression Featuree
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,83 @@
 # Changelog
 
+## [1.3.0] - 2026-02-27
+
+### Added
+
+#### Prompt Compression
+Reduce token usage and costs by compressing prompts before sending them to AI providers. Three compression methods are available:
+
+- **`toon`** (Free) - Converts JSON data to a compact notation format, achieving 30-60% token savings on structured data. Only activates when the prompt starts with `{` or `[` (pure JSON). Non-JSON input is returned unchanged.
+- **`compact`** ($0.0001/use) - Advanced compression that intelligently reduces prompt length while preserving meaning. Works on any text type. Supports configurable compression rate (0.3-0.7, default 0.5).
+- **`combined`** ($0.0001/use) - Applies TOON first, then runs Compact on the result for maximum token reduction. For non-JSON input, behaves identically to `compact`. Best when you want maximum compression.
+
+Prompt compression is opt-in and disabled by default. Security scanning always runs on the original text before compression is applied.
+
+**Proxy mode:**
+```typescript
+// TOON - compress structured JSON prompts (free)
+const openai = createOpenAI({
+  apiKey: process.env.LOCKLLM_API_KEY,
+  proxyOptions: {
+    compressionAction: 'toon'
+  }
+});
+
+// Compact - compress any text with configurable rate
+const openai2 = createOpenAI({
+  apiKey: process.env.LOCKLLM_API_KEY,
+  proxyOptions: {
+    compressionAction: 'compact',
+    compressionRate: 0.4  // Lower = more aggressive compression (0.3-0.7, default: 0.5)
+  }
+});
+
+// Combined - TOON then Compact for maximum compression
+const openai3 = createOpenAI({
+  apiKey: process.env.LOCKLLM_API_KEY,
+  proxyOptions: {
+    compressionAction: 'combined',
+    compressionRate: 0.5
+  }
+});
+```
+
+**Scan API:**
+```typescript
+const result = await lockllm.scan(
+  { input: '{"users": [{"name": "Alice"}, {"name": "Bob"}]}' },
+  { compressionAction: 'combined', compressionRate: 0.5 }
+);
+
+if (result.compression_result) {
+  console.log(result.compression_result.method);            // 'combined'
+  console.log(result.compression_result.compressed_input);   // Compressed text
+  console.log(result.compression_result.compression_ratio);  // e.g., 0.35
+}
+```
+
+#### Compression Response Metadata
+Proxy responses now include compression metadata in response headers:
+- `X-LockLLM-Compression-Method` - Compression method used (`toon`, `compact`, or `combined`)
+- `X-LockLLM-Compression-Applied` - Whether compression was applied (`true` or `false`)
+- `X-LockLLM-Compression-Ratio` - Ratio of compressed to original length (lower = better)
+
+Parse these with `parseProxyMetadata()`:
+```typescript
+const metadata = parseProxyMetadata(response.headers);
+console.log(metadata.compression);
+// { method: 'combined', applied: true, ratio: 0.35 }
+```
+
+### Notes
+- Prompt compression is opt-in. Existing integrations continue to work without changes.
+- All new types (`CompressionAction`, `CompressionResult`) are fully exported for TypeScript users.
+- Security scanning always runs on the original (uncompressed) text for maximum protection.
+- TOON compression is free. Compact and Combined cost $0.0001 per request.
+- Compression results are cached for 30 minutes to avoid redundant processing.
+
+---
+
 ## [1.2.0] - 2026-02-21
 
 ### Added
@@ -134,14 +212,14 @@ const openai = createOpenAI({
     scanMode: 'combined',
     scanAction: 'block',
     policyAction: 'block',
-    routeAction: 'auto',        // Enable intelligent routing
+    routeAction: 'auto',        // Enable smart routing
     cacheResponse: true,         // Enable response caching
     cacheTTL: 3600               // Cache for 1 hour
   }
 });
 ```
 
-#### Intelligent Routing
+#### Smart Routing
 Let LockLLM automatically select the best model for each request based on task type and complexity. Set `routeAction: 'auto'` to enable, or `routeAction: 'custom'` to use your own routing rules from the dashboard.
 
 #### Response Caching
diff --git a/README.md b/README.md
@@ -81,8 +81,9 @@ LockLLM provides production-ready AI security that integrates seamlessly into yo
 | **Custom Endpoints** | Configure custom URLs for any provider (self-hosted, Azure, private clouds) |
 | **Custom Content Policies** | Define your own content rules in the dashboard and enforce them automatically across all providers |
 | **AI Abuse Detection** | Detect bot-generated content, repetition attacks, and resource exhaustion from your end-users |
-| **Intelligent Routing** | Automatically select the optimal model for each request based on task type and complexity to save costs |
+| **Smart Routing** | Automatically select the optimal model for each request based on task type and complexity to save costs |
 | **PII Detection & Redaction** | Detect and automatically redact emails, phone numbers, SSNs, credit cards, and other personal information before they reach AI providers |
+| **Prompt Compression** | Reduce token usage with TOON (JSON-to-compact-notation, free), Compact (advanced compression, $0.0001/use), or Combined (TOON then Compact for maximum reduction, $0.0001/use) methods |
 | **Response Caching** | Cache identical LLM responses to reduce costs and latency on repeated queries |
 | **Enterprise Privacy** | Provider keys encrypted at rest, prompts never stored |
 | **Production Ready** | Battle-tested with automatic retries, timeouts, and error handling |
@@ -625,6 +626,8 @@ interface ScanOptions {
   policyAction?: 'block' | 'allow_with_warning'; // Custom policy behavior
   abuseAction?: 'block' | 'allow_with_warning';  // Abuse detection (opt-in)
   piiAction?: 'strip' | 'block' | 'allow_with_warning'; // PII detection (opt-in)
+  compressionAction?: 'toon' | 'compact' | 'combined'; // Prompt compression (opt-in)
+  compressionRate?: number;                            // Compact/combined compression rate (0.3-0.7)
 }
 ```
 
@@ -657,10 +660,20 @@ interface ScanResponse {
   scan_warning?: ScanWarning;
   // Present when abuse detection is enabled and abuse found
   abuse_warnings?: AbuseWarning;
-  // Present when intelligent routing is enabled
+  // Present when smart routing is enabled
   routing?: { task_type: string; complexity: number; selected_model?: string; };
   // Present when PII detection is enabled
   pii_result?: PIIResult;
+  // Present when prompt compression is enabled
+  compression_result?: CompressionResult;
+}
+
+interface CompressionResult {
+  method: 'toon' | 'compact' | 'combined'; // Compression method used
+  compressed_input: string;                 // The compressed text
+  original_length: number;                  // Original text length
+  compressed_length: number;                // Compressed text length
+  compression_ratio: number;                // Ratio (compressed/original, lower = better)
 }
 
 interface PIIResult {
@@ -694,6 +707,8 @@ interface GenericClientConfig {
     policyAction?: 'block' | 'allow_with_warning';
     abuseAction?: 'block' | 'allow_with_warning' | null;
     piiAction?: 'strip' | 'block' | 'allow_with_warning' | null;
+    compressionAction?: 'toon' | 'compact' | 'combined' | null;
+    compressionRate?: number;
     routeAction?: 'disabled' | 'auto' | 'custom';
     sensitivity?: 'low' | 'medium' | 'high';
     cacheResponse?: boolean;
@@ -747,9 +762,10 @@ const headers = buildLockLLMHeaders({
   policyAction: 'allow_with_warning',
   abuseAction: 'block',
   piiAction: 'strip',
+  compressionAction: 'toon',
   routeAction: 'auto'
 });
-// Returns: { 'x-lockllm-scan-mode': 'combined', 'x-lockllm-pii-action': 'strip', ... }
+// Returns: { 'x-lockllm-scan-mode': 'combined', 'x-lockllm-compression': 'toon', ... }
 ```
 
 **Parse proxy response metadata:**
@@ -764,6 +780,7 @@ console.log(metadata.scan_mode);     // 'combined'
 console.log(metadata.cache_status);  // 'HIT' or 'MISS'
 console.log(metadata.routing);       // { task_type, complexity, selected_model, ... }
 console.log(metadata.pii_detected);  // { detected, entity_types, entity_count, action }
+console.log(metadata.compression);   // { method, applied, ratio }
 ```
 
 ## Error Types
@@ -872,16 +889,16 @@ LockLLM uses a 10-tier progressive system based on monthly usage. Higher tiers u
 
 | Tier | Max RPM | Monthly Spending Requirement |
 |------|---------|----------------------------|
-| **Tier 1** (Free) | 30 RPM | $0 |
-| **Tier 2** | 50 RPM | $10/month |
-| **Tier 3** | 100 RPM | $50/month |
-| **Tier 4** | 200 RPM | $100/month |
-| **Tier 5** | 500 RPM | $250/month |
-| **Tier 6** | 1,000 RPM | $500/month |
-| **Tier 7** | 2,000 RPM | $1,000/month |
-| **Tier 8** | 5,000 RPM | $3,000/month |
-| **Tier 9** | 10,000 RPM | $5,000/month |
-| **Tier 10** | 20,000 RPM | $10,000/month |
+| **Tier 1** (Free) | 300 RPM | $0 |
+| **Tier 2** | 500 RPM | $10/month |
+| **Tier 3** | 1,000 RPM | $50/month |
+| **Tier 4** | 2,000 RPM | $100/month |
+| **Tier 5** | 5,000 RPM | $250/month |
+| **Tier 6** | 10,000 RPM | $500/month |
+| **Tier 7** | 20,000 RPM | $1,000/month |
+| **Tier 8** | 50,000 RPM | $3,000/month |
+| **Tier 9** | 100,000 RPM | $5,000/month |
+| **Tier 10** | 200,000 RPM | $10,000/month |
 
 See [pricing](https://www.lockllm.com/pricing) for full tier details and free monthly credits.
 
@@ -938,7 +955,8 @@ const result = await lockllm.scan(
     scanAction: 'block',        // Block core injection attacks
     policyAction: 'allow_with_warning',  // Allow but warn on policy violations
     abuseAction: 'block',       // Enable abuse detection (opt-in)
-    piiAction: 'strip'          // Redact PII from input (opt-in)
+    piiAction: 'strip',         // Redact PII from input (opt-in)
+    compressionAction: 'combined' // Compress prompts (opt-in: 'toon' | 'compact' | 'combined')
   }
 );
 
@@ -951,7 +969,9 @@ const openai = createOpenAI({
     policyAction: 'block',          // Block policy violations
     abuseAction: 'allow_with_warning',  // Detect abuse, don't block
     piiAction: 'strip',             // Automatically redact PII
-    routeAction: 'auto'             // Enable intelligent routing
+    compressionAction: 'compact',   // Compress prompts (free: 'toon', paid: 'compact' | 'combined')
+    compressionRate: 0.5,           // Compression rate 0.3-0.7 (compact/combined only)
+    routeAction: 'auto'             // Enable smart routing
   }
 });
 ```
@@ -971,14 +991,17 @@ const openai = createOpenAI({
 - `policyAction` - Controls custom policy violations: `'block'` | `'allow_with_warning'`
 - `abuseAction` - Controls abuse detection (opt-in): `'block'` | `'allow_with_warning'` | `null`
 - `piiAction` - Controls PII detection (opt-in): `'strip'` | `'block'` | `'allow_with_warning'` | `null`
-- `routeAction` - Controls intelligent routing: `'disabled'` | `'auto'` | `'custom'`
+- `compressionAction` - Controls prompt compression (opt-in): `'toon'` | `'compact'` | `'combined'` | `null`
+- `compressionRate` - Compression rate for compact/combined method: `0.3` - `0.7` (default: `0.5`)
+- `routeAction` - Controls smart routing: `'disabled'` | `'auto'` | `'custom'`
 
 **Default Behavior (no headers):**
 - Scan Mode: `combined` (check both core + policies)
 - Scan Action: `allow_with_warning` (detect but don't block)
 - Policy Action: `allow_with_warning` (detect but don't block)
 - Abuse Action: `null` (disabled, opt-in only)
 - PII Action: `null` (disabled, opt-in only)
+- Compression Action: `null` (disabled, opt-in only)
 - Route Action: `disabled` (no routing)
 
 See [examples/advanced-options.ts](examples/advanced-options.ts) for complete examples.
diff --git a/examples/advanced-options.ts b/examples/advanced-options.ts
@@ -87,7 +87,7 @@ async function proxyWithAdvancedOptions() {
       scanAction: 'block', // Block injection attacks
       policyAction: 'block', // Block policy violations
       abuseAction: 'allow_with_warning', // Detect abuse but don't block
-      routeAction: 'auto', // Enable intelligent routing
+      routeAction: 'auto', // Enable smart routing
     },
   });
 
@@ -139,7 +139,7 @@ async function defaultBehavior() {
    * - Scan Action: allow_with_warning (detect threats but don't block)
    * - Policy Action: allow_with_warning (detect violations but don't block)
    * - Abuse Action: null (abuse detection disabled, opt-in only)
-   * - Route Action: disabled (no intelligent routing)
+   * - Route Action: disabled (no smart routing)
    */
 
   try {
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@lockllm/sdk",
-  "version": "1.2.0",
+  "version": "1.3.0",
   "description": "Enterprise-grade AI security SDK providing real-time protection against prompt injection, jailbreaks, and adversarial attacks. Drop-in replacement for OpenAI, Anthropic, and 17+ providers with zero code changes. Includes REST API, proxy mode, browser extension, and webhook support. Free BYOK model with unlimited scanning.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",
diff --git a/src/index.ts b/src/index.ts
@@ -34,6 +34,7 @@ export type {
   ScanAction,
   RouteAction,
   PIIAction,
+  CompressionAction,
   ProxyRequestOptions,
   ProxyResponseMetadata,
 } from './types/common';
@@ -47,6 +48,7 @@ export type {
   ScanWarning,
   AbuseWarning,
   PIIResult,
+  CompressionResult,
 } from './types/scan';
 
 export type {
diff --git a/src/scan.ts b/src/scan.ts
@@ -94,6 +94,16 @@ export class ScanClient {
       headers['x-lockllm-pii-action'] = options.piiAction;
     }
 
+    // Compression action: opt-in prompt compression (null/undefined means disabled)
+    if (options?.compressionAction !== undefined && options?.compressionAction !== null) {
+      headers['x-lockllm-compression'] = options.compressionAction;
+    }
+
+    // Compression rate for compact method
+    if (options?.compressionRate !== undefined) {
+      headers['x-lockllm-compression-rate'] = String(options.compressionRate);
+    }
+
     // Build request body
     const body: Record<string, any> = {
       input: request.input,
diff --git a/src/types/common.ts b/src/types/common.ts
@@ -64,6 +64,9 @@ export type RouteAction = 'disabled' | 'auto' | 'custom';
 /** PII detection action (opt-in) */
 export type PIIAction = 'strip' | 'block' | 'allow_with_warning';
 
+/** Prompt compression method (opt-in) */
+export type CompressionAction = 'toon' | 'compact' | 'combined';
+
 /** Proxy request options with advanced headers */
 export interface ProxyRequestOptions extends RequestOptions {
   /** Scan mode (default: combined) - Check both core security and custom policies */
@@ -74,7 +77,7 @@ export interface ProxyRequestOptions extends RequestOptions {
   policyAction?: ScanAction;
   /** Abuse detection action (opt-in, default: null) - When null, abuse detection is disabled */
   abuseAction?: ScanAction | null;
-  /** Routing action (default: disabled) - No intelligent routing unless explicitly enabled */
+  /** Routing action (default: disabled) - No smart routing unless explicitly enabled */
   routeAction?: RouteAction;
   /** PII detection action (opt-in, default: null) - When null, PII detection is disabled */
   piiAction?: PIIAction | null;
@@ -84,6 +87,12 @@ export interface ProxyRequestOptions extends RequestOptions {
   cacheResponse?: boolean;
   /** Cache TTL in seconds (default: 3600) */
   cacheTTL?: number;
+  /** Prompt compression method (opt-in, default: null) - When null, compression is disabled.
+   *  "toon" converts JSON to compact notation (free). "compact" uses advanced compression ($0.0001/use).
+   *  "combined" applies TOON first then Compact for maximum compression ($0.0001/use). */
+  compressionAction?: CompressionAction | null;
+  /** Compression rate for compact method (0.3-0.7, default: 0.5) - Lower = more compression */
+  compressionRate?: number;
 }
 
 /** Response metadata from proxy */
@@ -171,4 +180,10 @@ export interface ProxyResponseMetadata {
   policy_detail?: any;
   /** Decoded abuse detail (from base64 header) */
   abuse_detail?: any;
+  /** Compression metadata */
+  compression?: {
+    method: string;
+    applied: boolean;
+    ratio: number;
+  };
 }
diff --git a/src/types/scan.ts b/src/types/scan.ts
@@ -3,7 +3,7 @@
  */
 
 import type { ScanResult } from './errors';
-import type { PIIAction } from './common';
+import type { PIIAction, CompressionAction } from './common';
 
 export type Sensitivity = 'low' | 'medium' | 'high';
 
@@ -24,6 +24,20 @@ export interface ScanRequest {
   chunk?: boolean;
 }
 
+/** Compression result */
+export interface CompressionResult {
+  /** Compression method used */
+  method: 'toon' | 'compact' | 'combined';
+  /** Compressed text */
+  compressed_input: string;
+  /** Original text length */
+  original_length: number;
+  /** Compressed text length */
+  compressed_length: number;
+  /** Compression ratio (compressed/original, lower = better) */
+  compression_ratio: number;
+}
+
 /** PII detection result */
 export interface PIIResult {
   /** Whether PII was detected */
@@ -46,6 +60,12 @@ export interface ScanOptions {
   abuseAction?: ScanAction | null;
   /** PII detection action (opt-in, default: null) - When null, PII detection is disabled */
   piiAction?: PIIAction | null;
+  /** Prompt compression method (opt-in, default: null) - When null, compression is disabled.
+   *  "toon" converts JSON to compact notation (free). "compact" uses advanced compression ($0.0001/use).
+   *  "combined" applies TOON first then Compact for maximum compression ($0.0001/use). */
+  compressionAction?: CompressionAction | null;
+  /** Compression rate for compact method (0.3-0.7, default: 0.5) - Lower = more compression */
+  compressionRate?: number;
   /** Custom headers to include in the request */
   headers?: Record<string, string>;
   /** Request timeout in milliseconds */
@@ -159,4 +179,6 @@ export interface ScanResponse {
   };
   /** PII detection result (present when PII detection is enabled) */
   pii_result?: PIIResult;
+  /** Compression result (present when compression is enabled) */
+  compression_result?: CompressionResult;
 }
diff --git a/src/utils/proxy-headers.ts b/src/utils/proxy-headers.ts
diff --git a/tests/compression-coverage.test.js b/tests/compression-coverage.test.js

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@lockllm/sdk",`
`3`		`- "version": "1.2.0",`
	`3`	`+ "version": "1.3.0",`
`4`	`4`	`"description": "Enterprise-grade AI security SDK providing real-time protection against prompt injection, jailbreaks, and adversarial attacks. Drop-in replacement for OpenAI, Anthropic, and 17+ providers with zero code changes. Includes REST API, proxy mode, browser extension, and webhook support. Free BYOK model with unlimited scanning.",`
`5`	`5`	`"main": "./dist/index.js",`
`6`	`6`	`"module": "./dist/index.mjs",`