diff --git a/intro/intro.js b/intro/intro.js index 0649ad0..5d6d938 100644 --- a/intro/intro.js +++ b/intro/intro.js @@ -14,7 +14,7 @@ const model = await llama.loadModel({ __dirname, "../", "models", - "Qwen3-1.7B-Q8_0.gguf" + "hf_Qwen_Qwen3-1.7B.Q8_0.gguf" ) }); diff --git a/logs/qwen_prompts_2025-11-03T13-00-39-159Z.txt b/logs/qwen_prompts_2025-11-03T13-00-39-159Z.txt new file mode 100644 index 0000000..f80166b --- /dev/null +++ b/logs/qwen_prompts_2025-11-03T13-00-39-159Z.txt @@ -0,0 +1,46 @@ + +========== PROMPT DEBUG OUTPUT ========== +Timestamp: 2025-11-03T13:00:39.158Z +Token Count: 518 + +=== CONTEXT STATE === +<|im_start|>system +You are a professional chronologist who standardizes time representations across different systems. + +Always convert times from 12-hour format (e.g., "1:46:36 PM") to 24-hour format (e.g., "13:46") without seconds +before returning them. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "getCurrentTime", "description": "Get the current time", "parameters": {"type": "object", "properties": {}}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +What time is it right now?<|im_end|> +<|im_start|>assistant + +Okay, the user is asking, "What time is it right now?" I need to figure out the current time. The tools provided include a function called getCurrentTime, which doesn't require any parameters. Since the user wants the current time in 24-hour format, I should call this function to get the current time. The function will return the time, which I can then convert to the 24-hour format as specified. I don't need to handle any other functions here because there's no additional information or parameters required. So, I'll call getCurrentTime and then format the result properly. + + + +{"name": "getCurrentTime", "arguments": {}} +<|im_end|> +<|im_start|>user + +"6:30:36 pm" +<|im_end|> +<|im_start|>assistant + +Okay, the user asked for the current time, and I called the getCurrentTime function. The response from the tool was "6:30:36 pm". Now I need to convert that to 24-hour format. Let me think. In 12-hour format, "pm" stands for "postmeridian", so 6:30:36 PM would be 18:30 in 24-hour time. The seconds part is optional in 24-hour format, so I'll remove the seconds. Therefore, the converted time is 18:30. I should present this as the final answer. + + +The current time is 18:30. +========================================== diff --git a/simple-agent-with-retry/CODE.md b/simple-agent-with-retry/CODE.md new file mode 100644 index 0000000..d8c3235 --- /dev/null +++ b/simple-agent-with-retry/CODE.md @@ -0,0 +1,186 @@ +# Code Explanation: simple-agent-with-retry.js + +This file extends the basic `simple-agent` example by integrating a small retry helper (`retryWithBackoff`) to make tool calls and session prompts more resilient to transient failures. The file demonstrates two patterns: + +- Wrapping a flaky tool implementation (`getCurrentTime`) with `retryWithBackoff` so transient errors are retried transparently. +- Wrapping a high-level `session.prompt(...)` call with the same helper when the whole LLM interaction may be unreliable. + +Below is a step-by-step explanation mirroring the code structure. + +## 1. Imports and setup + +```javascript +import {defineChatSessionFunction, getLlama, LlamaChatSession} from "node-llama-cpp"; +import {fileURLToPath} from "url"; +import path from "path"; +import {PromptDebugger} from "../helper/prompt-debugger.js"; +import { retryWithBackoff } from "./retry-util.js"; +``` + +- `defineChatSessionFunction` — used to expose functions/tools to the LLM. +- `PromptDebugger` — helper to dump and inspect the prompt/context sent to the model. +- `retryWithBackoff` — the retry helper that centralizes retry/backoff logic. + +We also compute `__dirname` and set a `debug` flag for the low-level library. + +## 2. Initialize the model and context + +The code loads the `llama` runtime, then loads a model from `models/` and creates a `context` with a fixed token size. + +Key lines: + +```javascript +const llama = await getLlama({debug}); +const model = await llama.loadModel({ modelPath: path.join(__dirname, "../", "models", "hf_Qwen_Qwen3-1.7B.Q8_0.gguf") }); +const context = await model.createContext({contextSize: 2000}); +``` + +Notes: + +- The model and context creation are asynchronous and may take time — in production you'd add lifecycle management around them. + +## 3. System prompt + +The system prompt instructs the agent to act as a chronologist and standardize time formats. This is the same behavior as the simple-agent example but is important to repeat because tool outputs are raw and the LLM is expected to perform the conversion. + +```javascript +const systemPrompt = `You are a professional chronologist who standardizes time representations across different systems. + +Always convert times from 12-hour format (e.g., "1:46:36 PM") to 24-hour format (e.g., "13:46") without seconds +before returning them.`; +``` + +## 4. Create a chat session + +```javascript +const session = new LlamaChatSession({ contextSequence: context.getSequence(), systemPrompt, }); +``` + +The session holds the conversation state and provides the `prompt()` convenience used later. + +## 5. Define the `getCurrentTime` tool with retries + +This tool illustrates how to wrap a flaky operation inside `retryWithBackoff`. + +Important parts of the handler: + +- The inner function (passed to `retryWithBackoff`) simulates an unreliable external API by randomly throwing an error ~50% of the time. +- On success it returns `new Date().toLocaleTimeString()` (a human-friendly string the LLM will convert). +- The `retryWithBackoff` call is configured with `retries`, `baseDelay`, `label`, and `simulateFails` (a deterministic testing hook). + +Why wrap the tool itself? + +- Tool implementations are the natural place to handle transient infrastructure problems. The LLM only needs a stable tool contract and doesn't have to reason about retries. +- This keeps the prompt and system design simple and robust. + +Excerpt: + +```javascript +const getCurrentTime = defineChatSessionFunction({ + description: "Get the current time", + params: { type: "object", properties: {} }, + async handler() { + return retryWithBackoff(async () => { + if (Math.random() < 0.5) throw new Error("Random API timeout"); + return new Date().toLocaleTimeString(); + }, { + retries: 2, + baseDelay: 300, + label: "getCurrentTime tool function", + simulateFails: 1 + }); + } +}); +``` + +Notes on options used here: + +- `retries: 2` → up to 3 total attempts. +- `baseDelay: 300` ms and default exponential factor (2) create a growing wait between attempts. +- `simulateFails: 1` is only for deterministic testing/demo, and should be disabled in production. + +## 6. Register functions and prompt the model (with retry) + +We register the function with the session and then call `session.prompt(prompt, {functions})` — however, the example wraps even the full prompt call with `retryWithBackoff`. This shows using the helper not only for tool implementations but also for higher-level interactions that might intermittently fail. + +```javascript +const functions = {getCurrentTime}; +const prompt = `What time is it right now?`; + +const a1 = await retryWithBackoff(() => session.prompt(prompt, {functions}), { + retries: 3, + baseDelay: 400, + factor: 2, + jitter: true, + label: "LLM session.prompt", + simulateFails: 2, + retryOn: (err) => true +}); + +console.log("AI: " + a1); +``` + +Why wrap the `session.prompt` call? + +- Some upstream runtimes, libraries, or I/O layers may be flaky. Wrapping the high-level call gives a safety net for transient RPC failures during model inference or I/O that aren't specific to a single tool. + +Important: use this sparingly for idempotent interactions — avoid retrying operations that would have side effects without idempotency guarantees. + +## 7. Prompt debugging and context inspection + +After the conversation, the code uses `PromptDebugger` to write the prompt and context to `./logs/qwen_prompts.txt` for offline inspection. This helps verify whether the model saw the function schema, system prompt, and the tool result. + +```javascript +const promptDebugger = new PromptDebugger({ outputDir: './logs', filename: 'qwen_prompts.txt', includeTimestamp: true, appendMode: false }); +await promptDebugger.debugContextState({session, model}); +``` + +## 8. Cleanup + +Always dispose of session/context/model/runtime objects to free native resources: + +```javascript +session.dispose(); +context.dispose(); +model.dispose(); +llama.dispose(); +``` + +## Key concepts demonstrated + +- Centralized retry policy: `retryWithBackoff` provides consistent semantics across the codebase. +- Tool contracts vs implementation: the LLM sees a simple `getCurrentTime` description; implementation deals with reliability. +- Jitter and exponential backoff: helps avoid thundering-herd and improves stability in distributed environments. +- Testing hooks: `simulateFails` helps write deterministic tests without complex network stubbing. + +## Best practices and tips + +- Only retry idempotent or read-only operations by default. For mutating operations, add idempotency keys or manual guards. +- Use `retryOn` to narrow retries to transient error classes (timeouts, 5xx responses) and avoid retrying 4xx client errors. +- Tune `retries`, `baseDelay` and `factor` based on latency/SLAs. Keep an upper cap on backoff (the helper uses 20s by default). +- Enable `jitter` in production to reduce synchronized retries. +- Log attempts and expose metrics (attempt count, retries, latency) for observability. + +## Testing suggestions + +- Unit tests: + - Success path (fn resolves first try). + - Transient failure path (fn throws N times then resolves) asserting the number of attempts. + - Permanent failure path (fn always throws) asserting final error is propagated. + - Use `simulateFails` for deterministic tests instead of stubbing timers. + +## Example expected output + +When successful, the LLM should return a normalized time like: + +``` +AI: 13:46 +``` + +This shows the tool returned a human-readable time (e.g., `"1:46:36 PM"`) and the LLM converted it to `"13:46"` per the system prompt. + +## Next steps you might want + +- Add small unit tests under a `test/` folder using a test runner (Jest/Mocha) to exercise `retry-util.js`. +- Replace `Math.random()` simulation with a pluggable provider in production. +- Add telemetry (Prometheus counters or simple structured logs) to record retry statistics. diff --git a/simple-agent-with-retry/CONCEPT.md b/simple-agent-with-retry/CONCEPT.md new file mode 100644 index 0000000..039a99a --- /dev/null +++ b/simple-agent-with-retry/CONCEPT.md @@ -0,0 +1,57 @@ +(# Concept: simple-agent-with-retry) + +## Overview + +`simple-agent-with-retry` is a small demonstration agent that combines LLM function-calling with a robust retry wrapper for unreliable tools or external APIs. The agent shows how a model can request an external piece of information (here: the current time) via a defined function, and how that function can tolerate transient failures using an exponential backoff strategy. + +The goal is to illustrate practical agent design patterns: clear tool contracts, safe retries, and keeping the LLM focused on conversion/formatting rather than dealing with transient transport errors. + +## Core idea (short) + +User asks: "What time is it?" + - LLM decides it needs current time → calls `getCurrentTime()` + - `getCurrentTime` implementation calls an external/local time provider wrapped with `retryWithBackoff` + - On transient failure, the wrapper retries; on success it returns a time string (e.g., "1:46:36 PM") + - LLM converts that to the required normalized form (24-hour, no seconds) and returns it to the user + +## Contract: `getCurrentTime` (tool exposed to the LLM) +- Input: none (the LLM calls the function without parameters) +- Output: string — a human-readable time (example: `"1:46:36 PM"`) +- Error modes: may throw/reject for persistent failures (network, platform); transient failures should be retried by the wrapper +- Success criteria: returns a valid time string recognizable by the LLM so it can perform format conversion + +## How retry fits into the agent flow + +1. LLM emits function call intent for `getCurrentTime`. +2. The agent's function handler invokes the time provider wrapped with `retryWithBackoff`. +3. The wrapper attempts the call, and on transient errors will retry per policy (exponential backoff, optional jitter). +4. If retries succeed, the handler returns the time to the LLM; if retries are exhausted, the handler throws and the LLM must handle the error case (e.g., say it couldn't fetch the time). + +## Key options demonstrated in the agent +- `retries`: how many retry attempts after the initial try (tunable based on expected reliability) +- `baseDelay` and `factor`: control the backoff curve +- `jitter`: avoid synchronized retry storms when many clients retry together +- `retryOn`: allow selective retrying (e.g., only for network timeouts, not for invalid responses) + +## Edge cases and design notes +- Persistent failure: if external service is down, retries will eventually exhaust; ensure the LLM is given a clear fallback or graceful message. +- Non-idempotent operations: only use retries for idempotent or read-only actions like time lookup; for state-changing operations, design idempotency tokens or avoid blind retries. +- Observability: log attempts, delays, and errors so you can debug retry behavior in production. +- Testing hooks: the sample includes a `simulateFails` option for deterministic tests. + +## Example flow (concise) + +- User: "What time is it?" +- LLM: calls `getCurrentTime()` +- Handler: calls time provider via `retryWithBackoff` → receives `"1:46:36 PM"` +- LLM: converts to `"13:46"` and replies to the user + +## Why this pattern matters + +This pattern separates responsibilities: the agent exposes a simple tool contract to the LLM, the tool implementation handles reliability concerns (retries/backoff), and the LLM focuses on reasoning and formatting. This keeps prompts simple and reduces brittle prompt-engineering workarounds for transient infra issues. + +## Next steps / improvements +- Add telemetry counters (attempts, failures, average delay) +- Expose a small config file for retry policy tuning per-environment +- Add unit tests covering success, transient failures, and permanent failures + diff --git a/simple-agent-with-retry/RETRY-UTIL.CONCEPT.md b/simple-agent-with-retry/RETRY-UTIL.CONCEPT.md new file mode 100644 index 0000000..10baee4 --- /dev/null +++ b/simple-agent-with-retry/RETRY-UTIL.CONCEPT.md @@ -0,0 +1,74 @@ +% Retry Utility Concept + +## Overview + +`retryWithBackoff` is a small, reusable helper that runs an operation (sync or async) and, on failure, retries it according to an exponential backoff policy with optional jitter. It's designed for transient, idempotent operations such as network requests, local RPCs, or small platform calls where occasional timeouts or flakiness are expected. + +This concept document explains the intent, options, behavior, and recommended usage patterns for the helper included in this project. + +## Purpose + +- Improve robustness by automatically retrying transient failures. +- Provide a single, well-tested place to tune retry behavior (delays, caps, jitter). +- Keep calling code (e.g., tool handlers) simple by moving retry complexity into the utility. + +## Function contract (high level) + +Function: `retryWithBackoff(fn, options?)` + +- `fn`: a callable that returns a value or a Promise. +- Returns: the return value of `fn` when successful. +- Throws: rethrows the last error if retries are exhausted or if `retryOn` returns `false` for a given error. + +## Important options + +- `retries` (number): how many additional attempts after the first call (default e.g. 3 → up to 4 attempts). +- `baseDelay` (ms): initial delay before the first retry. +- `factor` (number): exponential multiplier for delay growth. +- `jitter` (boolean): when true, multiply delay by a random factor (e.g., 0.8–1.2) to avoid synchronized retries. +- `label` (string): human-friendly name used in logs. +- `verbose` (boolean): when true, log attempts and delays for visibility. +- `simulateFails` (number): testing hook to force simulated failures for the first N attempts. +- `retryOn` (err => boolean): predicate to decide whether an error should be retried. + +## Behavior details + +- Total attempts = 1 + `retries`. +- Delay before attempt N is typically: min(baseDelay * factor^(N-1), cap) (cap prevents excessively long waits). +- If `jitter` is enabled, the computed delay is multiplied by a small random factor to reduce thundering-herd effects. +- If `retryOn(err)` returns `false`, the helper will immediately rethrow the error (no more retries). + +## Edge cases and guidance + +- Non-idempotent operations: avoid automatic retries or implement idempotency tokens before retrying. +- Long-running operations: ensure the retry policy accounts for operation duration (timeouts + retries may exceed user expectations). +- Error classification: use `retryOn` to filter retriable errors (e.g., retry on network timeouts, not on 4xx client validation errors). +- Backoff cap: pick a sensible upper bound (e.g., 20s) so a failed call doesn't block for too long. + +## Example usage (conceptual) + +- Wrap a flaky time-fetching helper used by an LLM tool: `await retryWithBackoff(() => fetchTime(), { retries: 2, jitter: true })`. +- Use `retryOn` to retry only on `err.code === 'ETIMEDOUT'` or similar patterns. + +## Testing tips + +- Unit test success path (fn resolves on first try). +- Unit test transient failure (fn throws first N times then resolves) and assert expected number of attempts. +- Unit test permanent failure (fn always throws) and ensure final error is thrown after expected attempts. +- Use `simulateFails` option for deterministic tests rather than stubbing timers when possible. + +## Observability and metrics + +- Expose counters for attempts, retries, and failures. +- Record histogram of delays and latency for successful attempts. +- Combine logs with the `label` so you can correlate retries with the calling operation. + +## When not to use retries + +- For operations that mutate shared state without idempotency. +- When strict single-attempt semantics are required. + +## Key takeaways + +- `retryWithBackoff` centralizes retry policy and makes tool handlers simpler and more robust. +- Configure `retryOn`, `retries`, and `jitter` deliberately — defaults are convenient but may need tuning for production. diff --git a/simple-agent-with-retry/retry-util.js b/simple-agent-with-retry/retry-util.js new file mode 100644 index 0000000..f3d8b8c --- /dev/null +++ b/simple-agent-with-retry/retry-util.js @@ -0,0 +1,122 @@ +/** + * Retry an asynchronous (or synchronous) operation with exponential backoff and optional jitter. + * + * This wrapper will call the provided function `fn` and, on failure, will retry it according to + * the supplied options. The function increments an internal attempt counter before each call, + * so the `retries` option specifies how many retry attempts will be made after the initial call + * (i.e. total attempts = 1 + retries). Delay between attempts grows exponentially by `factor` + * and is capped at 20_000 ms. When `jitter` is enabled, the computed delay is multiplied by a + * random factor in the range [0.8, 1.2]. + * + * @template T + * @param {number} [options.baseDelay=300] + * Base delay in milliseconds used for the backoff calculation. The delay for attempt N (1-based) + * is calculated as baseDelay * factor^(N-1) (before applying jitter and capping). + * + * @param {number} [options.factor=2] + * Exponential multiplier applied to the delay each attempt. A factor of 2 doubles the delay + * each retry. + * + * @param {boolean} [options.jitter=false] + * When true, applies random jitter to the delay. The jitter multiplies the calculated delay by + * a random factor between 0.8 and 1.2 to help reduce synchronization storms. + * + * @param {string} [options.label="operation"] + * Human-friendly label used in verbose logging to identify the operation being retried. + * + * @param {boolean} [options.verbose=true] + * When true, the wrapper logs progress and decisions (attempts, delays, errors, success). + * + * @param {number} [options.simulateFails=0] + * Testing/demo option: intentionally throw a simulated error for the first `simulateFails` + * attempts. This allows testing the retry logic without needing a real failure. + * + * @param {(err: Error) => boolean} [options.retryOn=() => true] + * Predicate function invoked with the thrown/rejected error. Return true to retry the operation, + * or false to stop retrying and immediately rethrow the error. Allows selective retrying for + * transient errors only. + * + */ +export async function retryWithBackoff( + fn, + { + retries = 3, + baseDelay = 300, + factor = 2, + jitter = false, + label = "operation", + verbose = true, + simulateFails = 0, + retryOn = () => true // fn: (err) => boolean + } = {} +) { + let attempt = 0; + let forcedFailCount = 0; + + if (verbose) { + console.log(`\n--- RETRY WRAPPER START: ${label} ---`); + console.log(`Max retries: ${retries}`); + console.log(`Base delay: ${baseDelay}ms`); + console.log(`Strategy: exponential x${factor}${jitter ? " + jitter" : ""}\n`); + } + + while (attempt <= retries) { + try { + attempt++; + + if (verbose) console.log(`[Attempt ${attempt}] Calling ${label}...`); + + //SIMULATED FAILURE (for demo) + if (forcedFailCount < simulateFails) { + forcedFailCount++; + throw new Error(`Simulated failure ${forcedFailCount}/${simulateFails}`); + } + + const result = await fn(); // actual call + + if (verbose) { + console.log(`Success on attempt ${attempt}`); + console.log(`--- END RETRY REPORT (${label}) ---\n`); + } + return result; + + } catch (err) { + + if (!retryOn(err)) { + console.error(`Non-retryable error: ${err.message}`); + throw err; + } + + if (attempt > retries) { + console.error(`Failed after ${retries} retries:`, err.message); + console.log(`--- END RETRY REPORT (${label}) ---\n`); + throw err; + } + + // Calculate next delay + const delay = Math.min( + baseDelay * Math.pow(factor, attempt - 1), + 20000 // cap to 20 sec max wait + ); + const jittered = jitter// jitter the delay to avoid thundering herd effect + ? delay * (0.8 + Math.random() * 0.4) + : delay; + + if (verbose) {// if verbose is switched on we log the error and the retry info, for more visibility + console.log(`Error: ${err.message}`); + console.log(`Retrying in ${Math.round(jittered)}ms...\n`); + } + + await new Promise(res => setTimeout(res, jittered)); + } + } +} + +//thundering herd effect: A situation where multiple systems or processes +//attempt to perform the same action simultaneously, often leading to +//resource contention, performance degradation, or system overload. +//This often occurs in distributed systems, caching mechanisms, or +//networked applications when many clients try to access a shared resource +//at the same time after a cache expiration or failure. It is relevant here +//because without jitter, multiple retries could align in time, causing +//spikes in load. \ No newline at end of file diff --git a/simple-agent-with-retry/simple-agent-with-retry.js b/simple-agent-with-retry/simple-agent-with-retry.js new file mode 100644 index 0000000..eaf5db9 --- /dev/null +++ b/simple-agent-with-retry/simple-agent-with-retry.js @@ -0,0 +1,105 @@ +import {defineChatSessionFunction, getLlama, LlamaChatSession} from "node-llama-cpp"; +import {fileURLToPath} from "url"; +import path from "path"; +import {PromptDebugger} from "../helper/prompt-debugger.js"; +import { retryWithBackoff } from "./retry-util.js"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const debug = false; + +const llama = await getLlama({debug}); +const model = await llama.loadModel({ + modelPath: path.join( + __dirname, + "../", + "models", + "hf_Qwen_Qwen3-1.7B.Q8_0.gguf" + ) +}); +const context = await model.createContext({contextSize: 2000}); + +const systemPrompt = `You are a professional chronologist who standardizes time representations across different systems. + +Always convert times from 12-hour format (e.g., "1:46:36 PM") to 24-hour format (e.g., "13:46") without seconds +before returning them.`; + + +const session = new LlamaChatSession({ + contextSequence: context.getSequence(), + systemPrompt, +}); + +const getCurrentTime = defineChatSessionFunction({ + description: "Get the current time", + params: { + type: "object", + properties: {} + }, + // The handler invoked when the LLM calls the getCurrentTime function. + // It returns a promise and uses retryWithBackoff to tolerate transient failures. + async handler() { + // Wrap the tool logic in retryWithBackoff so transient errors are retried + // according to the backoff policy defined in the options object below. + return retryWithBackoff( + async () => { + // Simulate an unreliable external tool or API call: + // - Approximately 50% of the time we throw an error to emulate a timeout/failure. + // - When successful, we return a human-readable time string. + // + // Note: toLocaleTimeString often includes seconds and AM/PM; the system prompt + // expects the model to convert to 24-hour format without seconds after receiving this. + if (Math.random() < 0.5) { + // Throwing signals a transient failure; retryWithBackoff will catch this and retry. + throw new Error("Random API timeout"); + } + + // On success, return the current local time string. + return new Date().toLocaleTimeString(); + }, + { + // How many times to retry after the initial attempt (2 retries → up to 3 attempts). + retries: 2, + // Base delay (ms) before the first retry; actual delays typically grow per attempt. + baseDelay: 300, + // Label useful for logging/tracing which operation is being retried. + label: "getCurrentTime tool function", + // Optional testing hook: simulateFails causes the retry helper itself to + // pretend a specified number of failures before allowing success. Useful for testing. + simulateFails: 1 + } + ); + } + }); + +const functions = {getCurrentTime}; +const prompt = `What time is it right now?`; + +// Execute the prompt +//Example 2: Wrapping the function call with retry logic to simulate transient failures in API calls +const a1 = await retryWithBackoff(() => session.prompt(prompt, {functions}), +{ + retries: 3, + baseDelay: 400, + factor: 2, + jitter: true, + label: "LLM session.prompt", + simulateFails: 2, + retryOn: (err) => true + +}); +console.log("AI: " + a1); + +// Debug after the prompt execution +const promptDebugger = new PromptDebugger({ + outputDir: './logs', + filename: 'qwen_prompts.txt', + includeTimestamp: true, // adds timestamp to filename + appendMode: false // overwrites file each time +}); +await promptDebugger.debugContextState({session, model}); + +// Clean up +session.dispose() +context.dispose() +model.dispose() +llama.dispose() \ No newline at end of file