Skip to content

Commit 6d0497b

Browse files
committed
refactor: move llama.cpp config handling to Rust
- Removed duplicated TypeScript type definitions for LlamacppConfig, ModelPlan, DownloadItem, ModelConfig, etc. - Added a new `src/guest-js/types.ts` that exports the consolidated types and a helper `normalizeLlamacppConfig` for converting raw config objects. - Implemented a dedicated Rust module `args.rs` that builds all command‑line arguments for llama.cpp from a `LlamacppConfig` struct, handling embedding, flash‑attention, GPU/CPU flags, and other options. - Updated `commands.rs` to construct arguments via `ArgumentBuilder`, validate paths, and log the generated args. - Added more explicit error handling for invalid configuration arguments and updated the error enum to include `InvalidArgument`. - Exported the new `cleanupLlamaProcesses` command and updated the guest‑JS API accordingly. - Adjusted the TypeScript `loadLlamaModel` helper to use the new config normalization and argument shape. - Improved logging and documentation for clarity.
1 parent 4957509 commit 6d0497b

File tree

7 files changed

+680
-236
lines changed

7 files changed

+680
-236
lines changed

extensions/llamacpp-extension/src/index.ts

Lines changed: 27 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -44,106 +44,19 @@ import {
4444
isModelSupported,
4545
planModelLoadInternal,
4646
unloadLlamaModel,
47+
LlamacppConfig,
48+
ModelPlan,
49+
DownloadItem,
50+
ModelConfig,
51+
EmbeddingResponse,
52+
DeviceList,
53+
SystemMemory,
4754
} from '@janhq/tauri-plugin-llamacpp-api'
4855
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
4956

5057
// Error message constant - matches web-app/src/utils/error.ts
5158
const OUT_OF_CONTEXT_SIZE = 'the request exceeds the available context size.'
5259

53-
type LlamacppConfig = {
54-
version_backend: string
55-
auto_update_engine: boolean
56-
auto_unload: boolean
57-
timeout: number
58-
llamacpp_env: string
59-
memory_util: string
60-
chat_template: string
61-
n_gpu_layers: number
62-
offload_mmproj: boolean
63-
cpu_moe: boolean
64-
n_cpu_moe: number
65-
override_tensor_buffer_t: string
66-
ctx_size: number
67-
threads: number
68-
threads_batch: number
69-
n_predict: number
70-
batch_size: number
71-
ubatch_size: number
72-
device: string
73-
split_mode: string
74-
main_gpu: number
75-
flash_attn: string
76-
cont_batching: boolean
77-
no_mmap: boolean
78-
mlock: boolean
79-
no_kv_offload: boolean
80-
cache_type_k: string
81-
cache_type_v: string
82-
defrag_thold: number
83-
rope_scaling: string
84-
rope_scale: number
85-
rope_freq_base: number
86-
rope_freq_scale: number
87-
ctx_shift: boolean
88-
}
89-
90-
type ModelPlan = {
91-
gpuLayers: number
92-
maxContextLength: number
93-
noOffloadKVCache: boolean
94-
offloadMmproj?: boolean
95-
batchSize: number
96-
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
97-
}
98-
99-
interface DownloadItem {
100-
url: string
101-
save_path: string
102-
proxy?: Record<string, string | string[] | boolean>
103-
sha256?: string
104-
size?: number
105-
}
106-
107-
interface ModelConfig {
108-
model_path: string
109-
mmproj_path?: string
110-
name: string // user-friendly
111-
// some model info that we cache upon import
112-
size_bytes: number
113-
sha256?: string
114-
mmproj_sha256?: string
115-
mmproj_size_bytes?: number
116-
}
117-
118-
interface EmbeddingResponse {
119-
model: string
120-
object: string
121-
usage: {
122-
prompt_tokens: number
123-
total_tokens: number
124-
}
125-
data: EmbeddingData[]
126-
}
127-
128-
interface EmbeddingData {
129-
embedding: number[]
130-
index: number
131-
object: string
132-
}
133-
134-
interface DeviceList {
135-
id: string
136-
name: string
137-
mem: number
138-
free: number
139-
}
140-
141-
interface SystemMemory {
142-
totalVRAM: number
143-
totalRAM: number
144-
totalMemory: number
145-
}
146-
14760
/**
14861
* Override the default app.log function to use Jan's logging system.
14962
* @param args
@@ -841,7 +754,7 @@ export default class llamacpp_extension extends AIEngine {
841754
return { updateNeeded: false, newVersion: '0' }
842755
}
843756

844-
const [latestVersion, latestBackend] = targetBackendString.split('/')
757+
const [latestVersion] = targetBackendString.split('/')
845758

846759
// Check if update is needed (version comparison)
847760
if (
@@ -1671,7 +1584,7 @@ export default class llamacpp_extension extends AIEngine {
16711584
return await this.findSessionByModel(modelId)
16721585
} catch (e) {
16731586
logger.warn(`Unable to find session for model "${modelId}": ${e}`)
1674-
return null // treat as “not‑eligible for unload”
1587+
return null
16751588
}
16761589
})
16771590
)
@@ -1689,10 +1602,11 @@ export default class llamacpp_extension extends AIEngine {
16891602
}
16901603
}
16911604
}
1692-
const args: string[] = []
1605+
16931606
const envs: Record<string, string> = {}
16941607
const cfg = { ...this.config, ...(overrideSettings ?? {}) }
16951608
const [version, backend] = cfg.version_backend.split('/')
1609+
16961610
if (!version || !backend) {
16971611
throw new Error(
16981612
'Initial setup for the backend failed due to a network issue. Please restart the app!'
@@ -1714,120 +1628,41 @@ export default class llamacpp_extension extends AIEngine {
17141628
})
17151629
const port = await this.getRandomPort()
17161630

1717-
// disable llama-server webui
1718-
// TODO: Determine what's the best course of action here.
1719-
// Hopefully, we would want all the fork to have same set of arguments
1720-
// Otherwise it would become impossible to maintain
1721-
// Keeping this for now
1722-
if (!backend.startsWith('ik')) args.push('--no-webui')
1631+
// Generate API key
17231632
const api_key = await this.generateApiKey(modelId, String(port))
17241633
envs['LLAMA_API_KEY'] = api_key
17251634
envs['LLAMA_ARG_TIMEOUT'] = String(this.timeout)
17261635

1727-
// set user envs
1636+
// Set user envs
17281637
if (this.llamacpp_env) this.parseEnvFromString(envs, this.llamacpp_env)
17291638

1730-
// model option is required
1731-
// NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
1639+
// Resolve model path
17321640
const modelPath = await joinPath([
17331641
janDataFolderPath,
17341642
modelConfig.model_path,
17351643
])
1736-
args.push('--jinja')
1737-
args.push('-m', modelPath)
1738-
if (cfg.cpu_moe) args.push('--cpu-moe')
1739-
if (cfg.n_cpu_moe && cfg.n_cpu_moe > 0) {
1740-
args.push('--n-cpu-moe', String(cfg.n_cpu_moe))
1741-
}
1742-
// For overriding tensor buffer type, useful where
1743-
// massive MOE models can be made faster by keeping attention on the GPU
1744-
// and offloading the expert FFNs to the CPU.
1745-
// This is an expert level settings and should only be used by people
1746-
// who knows what they are doing.
1747-
// Takes a regex with matching tensor name as input
1748-
if (cfg.override_tensor_buffer_t)
1749-
args.push('--override-tensor', cfg.override_tensor_buffer_t)
1750-
// offload multimodal projector model to the GPU by default. if there is not enough memory
1751-
// turn this setting off will keep the projector model on the CPU but the image processing can
1752-
// take longer
1753-
if (cfg.offload_mmproj === false) args.push('--no-mmproj-offload')
1754-
args.push('-a', modelId)
1755-
args.push('--port', String(port))
1756-
if (modelConfig.mmproj_path) {
1757-
const mmprojPath = await joinPath([
1758-
janDataFolderPath,
1759-
modelConfig.mmproj_path,
1760-
])
1761-
args.push('--mmproj', mmprojPath)
1762-
}
1763-
// Add remaining options from the interface
1764-
if (cfg.chat_template) args.push('--chat-template', cfg.chat_template)
1765-
const gpu_layers =
1766-
parseInt(String(cfg.n_gpu_layers)) >= 0 ? cfg.n_gpu_layers : 100
1767-
args.push('-ngl', String(gpu_layers))
1768-
if (cfg.threads > 0) args.push('--threads', String(cfg.threads))
1769-
if (cfg.threads_batch > 0)
1770-
args.push('--threads-batch', String(cfg.threads_batch))
1771-
if (cfg.batch_size > 0) args.push('--batch-size', String(cfg.batch_size))
1772-
if (cfg.ubatch_size > 0) args.push('--ubatch-size', String(cfg.ubatch_size))
1773-
if (cfg.device.length > 0) args.push('--device', cfg.device)
1774-
if (cfg.split_mode.length > 0 && cfg.split_mode != 'layer')
1775-
args.push('--split-mode', cfg.split_mode)
1776-
if (cfg.main_gpu !== undefined && cfg.main_gpu !== 0)
1777-
args.push('--main-gpu', String(cfg.main_gpu))
1778-
// Note: Older llama.cpp versions are no longer supported
1779-
if (
1780-
cfg.flash_attn !== undefined ||
1781-
(cfg.flash_attn !== 'auto' && // set argument only when the setting value is not auto
1782-
!backend.startsWith('ik')) // ik fork of llama.cpp doesn't support --flash-attn
1783-
) {
1784-
args.push('--flash-attn', String(cfg.flash_attn)) //default: auto = ON when supported
1785-
} else if (backend.startsWith('ik') && cfg.flash_attn == 'on') {
1786-
args.push('-fa') // hoping the ik fork is still using the old fa arguments
1787-
}
1788-
1789-
// Boolean flags
1790-
if (cfg.ctx_shift) args.push('--context-shift')
1791-
if (cfg.cont_batching) args.push('--cont-batching')
1792-
if (cfg.no_mmap) args.push('--no-mmap')
1793-
if (cfg.mlock) args.push('--mlock')
1794-
if (cfg.no_kv_offload) args.push('--no-kv-offload')
1795-
if (isEmbedding) {
1796-
args.push('--embedding')
1797-
args.push('--pooling', 'mean')
1798-
} else {
1799-
if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
1800-
if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
1801-
if (cfg.cache_type_k && cfg.cache_type_k != 'f16')
1802-
args.push('--cache-type-k', cfg.cache_type_k)
1803-
if (
1804-
cfg.flash_attn !== 'on' &&
1805-
cfg.cache_type_v != 'f16' &&
1806-
cfg.cache_type_v != 'f32'
1807-
) {
1808-
args.push('--cache-type-v', cfg.cache_type_v)
1809-
}
1810-
if (cfg.defrag_thold && cfg.defrag_thold != 0.1)
1811-
args.push('--defrag-thold', String(cfg.defrag_thold))
18121644

1813-
if (cfg.rope_scaling && cfg.rope_scaling != 'none')
1814-
args.push('--rope-scaling', cfg.rope_scaling)
1815-
if (cfg.rope_scale && cfg.rope_scale != 1)
1816-
args.push('--rope-scale', String(cfg.rope_scale))
1817-
if (cfg.rope_freq_base && cfg.rope_freq_base != 0)
1818-
args.push('--rope-freq-base', String(cfg.rope_freq_base))
1819-
if (cfg.rope_freq_scale && cfg.rope_freq_scale != 1)
1820-
args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
1645+
// Resolve mmproj path if present
1646+
let mmprojPath: string | undefined = undefined
1647+
if (modelConfig.mmproj_path) {
1648+
mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path])
18211649
}
18221650

1823-
logger.info('Calling Tauri command llama_load with args:', args)
1651+
logger.info(
1652+
'Calling Tauri command load_llama_model with config:',
1653+
JSON.stringify(cfg)
1654+
)
18241655
const backendPath = await getBackendExePath(backend, version)
18251656

18261657
try {
18271658
const sInfo = await loadLlamaModel(
18281659
backendPath,
1829-
args,
1660+
modelId,
1661+
modelPath,
1662+
port,
1663+
cfg,
18301664
envs,
1665+
mmprojPath,
18311666
isEmbedding,
18321667
Number(this.timeout)
18331668
)

0 commit comments

Comments
 (0)