@@ -44,106 +44,19 @@ import {
4444 isModelSupported ,
4545 planModelLoadInternal ,
4646 unloadLlamaModel ,
47+ LlamacppConfig ,
48+ ModelPlan ,
49+ DownloadItem ,
50+ ModelConfig ,
51+ EmbeddingResponse ,
52+ DeviceList ,
53+ SystemMemory ,
4754} from '@janhq/tauri-plugin-llamacpp-api'
4855import { getSystemUsage , getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
4956
5057// Error message constant - matches web-app/src/utils/error.ts
5158const OUT_OF_CONTEXT_SIZE = 'the request exceeds the available context size.'
5259
53- type LlamacppConfig = {
54- version_backend : string
55- auto_update_engine : boolean
56- auto_unload : boolean
57- timeout : number
58- llamacpp_env : string
59- memory_util : string
60- chat_template : string
61- n_gpu_layers : number
62- offload_mmproj : boolean
63- cpu_moe : boolean
64- n_cpu_moe : number
65- override_tensor_buffer_t : string
66- ctx_size : number
67- threads : number
68- threads_batch : number
69- n_predict : number
70- batch_size : number
71- ubatch_size : number
72- device : string
73- split_mode : string
74- main_gpu : number
75- flash_attn : string
76- cont_batching : boolean
77- no_mmap : boolean
78- mlock : boolean
79- no_kv_offload : boolean
80- cache_type_k : string
81- cache_type_v : string
82- defrag_thold : number
83- rope_scaling : string
84- rope_scale : number
85- rope_freq_base : number
86- rope_freq_scale : number
87- ctx_shift : boolean
88- }
89-
90- type ModelPlan = {
91- gpuLayers : number
92- maxContextLength : number
93- noOffloadKVCache : boolean
94- offloadMmproj ?: boolean
95- batchSize : number
96- mode : 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
97- }
98-
99- interface DownloadItem {
100- url : string
101- save_path : string
102- proxy ?: Record < string , string | string [ ] | boolean >
103- sha256 ?: string
104- size ?: number
105- }
106-
107- interface ModelConfig {
108- model_path : string
109- mmproj_path ?: string
110- name : string // user-friendly
111- // some model info that we cache upon import
112- size_bytes : number
113- sha256 ?: string
114- mmproj_sha256 ?: string
115- mmproj_size_bytes ?: number
116- }
117-
118- interface EmbeddingResponse {
119- model : string
120- object : string
121- usage : {
122- prompt_tokens : number
123- total_tokens : number
124- }
125- data : EmbeddingData [ ]
126- }
127-
128- interface EmbeddingData {
129- embedding : number [ ]
130- index : number
131- object : string
132- }
133-
134- interface DeviceList {
135- id : string
136- name : string
137- mem : number
138- free : number
139- }
140-
141- interface SystemMemory {
142- totalVRAM : number
143- totalRAM : number
144- totalMemory : number
145- }
146-
14760/**
14861 * Override the default app.log function to use Jan's logging system.
14962 * @param args
@@ -841,7 +754,7 @@ export default class llamacpp_extension extends AIEngine {
841754 return { updateNeeded : false , newVersion : '0' }
842755 }
843756
844- const [ latestVersion , latestBackend ] = targetBackendString . split ( '/' )
757+ const [ latestVersion ] = targetBackendString . split ( '/' )
845758
846759 // Check if update is needed (version comparison)
847760 if (
@@ -1671,7 +1584,7 @@ export default class llamacpp_extension extends AIEngine {
16711584 return await this . findSessionByModel ( modelId )
16721585 } catch ( e ) {
16731586 logger . warn ( `Unable to find session for model "${ modelId } ": ${ e } ` )
1674- return null // treat as “not‑eligible for unload”
1587+ return null
16751588 }
16761589 } )
16771590 )
@@ -1689,10 +1602,11 @@ export default class llamacpp_extension extends AIEngine {
16891602 }
16901603 }
16911604 }
1692- const args : string [ ] = [ ]
1605+
16931606 const envs : Record < string , string > = { }
16941607 const cfg = { ...this . config , ...( overrideSettings ?? { } ) }
16951608 const [ version , backend ] = cfg . version_backend . split ( '/' )
1609+
16961610 if ( ! version || ! backend ) {
16971611 throw new Error (
16981612 'Initial setup for the backend failed due to a network issue. Please restart the app!'
@@ -1714,120 +1628,41 @@ export default class llamacpp_extension extends AIEngine {
17141628 } )
17151629 const port = await this . getRandomPort ( )
17161630
1717- // disable llama-server webui
1718- // TODO: Determine what's the best course of action here.
1719- // Hopefully, we would want all the fork to have same set of arguments
1720- // Otherwise it would become impossible to maintain
1721- // Keeping this for now
1722- if ( ! backend . startsWith ( 'ik' ) ) args . push ( '--no-webui' )
1631+ // Generate API key
17231632 const api_key = await this . generateApiKey ( modelId , String ( port ) )
17241633 envs [ 'LLAMA_API_KEY' ] = api_key
17251634 envs [ 'LLAMA_ARG_TIMEOUT' ] = String ( this . timeout )
17261635
1727- // set user envs
1636+ // Set user envs
17281637 if ( this . llamacpp_env ) this . parseEnvFromString ( envs , this . llamacpp_env )
17291638
1730- // model option is required
1731- // NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
1639+ // Resolve model path
17321640 const modelPath = await joinPath ( [
17331641 janDataFolderPath ,
17341642 modelConfig . model_path ,
17351643 ] )
1736- args . push ( '--jinja' )
1737- args . push ( '-m' , modelPath )
1738- if ( cfg . cpu_moe ) args . push ( '--cpu-moe' )
1739- if ( cfg . n_cpu_moe && cfg . n_cpu_moe > 0 ) {
1740- args . push ( '--n-cpu-moe' , String ( cfg . n_cpu_moe ) )
1741- }
1742- // For overriding tensor buffer type, useful where
1743- // massive MOE models can be made faster by keeping attention on the GPU
1744- // and offloading the expert FFNs to the CPU.
1745- // This is an expert level settings and should only be used by people
1746- // who knows what they are doing.
1747- // Takes a regex with matching tensor name as input
1748- if ( cfg . override_tensor_buffer_t )
1749- args . push ( '--override-tensor' , cfg . override_tensor_buffer_t )
1750- // offload multimodal projector model to the GPU by default. if there is not enough memory
1751- // turn this setting off will keep the projector model on the CPU but the image processing can
1752- // take longer
1753- if ( cfg . offload_mmproj === false ) args . push ( '--no-mmproj-offload' )
1754- args . push ( '-a' , modelId )
1755- args . push ( '--port' , String ( port ) )
1756- if ( modelConfig . mmproj_path ) {
1757- const mmprojPath = await joinPath ( [
1758- janDataFolderPath ,
1759- modelConfig . mmproj_path ,
1760- ] )
1761- args . push ( '--mmproj' , mmprojPath )
1762- }
1763- // Add remaining options from the interface
1764- if ( cfg . chat_template ) args . push ( '--chat-template' , cfg . chat_template )
1765- const gpu_layers =
1766- parseInt ( String ( cfg . n_gpu_layers ) ) >= 0 ? cfg . n_gpu_layers : 100
1767- args . push ( '-ngl' , String ( gpu_layers ) )
1768- if ( cfg . threads > 0 ) args . push ( '--threads' , String ( cfg . threads ) )
1769- if ( cfg . threads_batch > 0 )
1770- args . push ( '--threads-batch' , String ( cfg . threads_batch ) )
1771- if ( cfg . batch_size > 0 ) args . push ( '--batch-size' , String ( cfg . batch_size ) )
1772- if ( cfg . ubatch_size > 0 ) args . push ( '--ubatch-size' , String ( cfg . ubatch_size ) )
1773- if ( cfg . device . length > 0 ) args . push ( '--device' , cfg . device )
1774- if ( cfg . split_mode . length > 0 && cfg . split_mode != 'layer' )
1775- args . push ( '--split-mode' , cfg . split_mode )
1776- if ( cfg . main_gpu !== undefined && cfg . main_gpu !== 0 )
1777- args . push ( '--main-gpu' , String ( cfg . main_gpu ) )
1778- // Note: Older llama.cpp versions are no longer supported
1779- if (
1780- cfg . flash_attn !== undefined ||
1781- ( cfg . flash_attn !== 'auto' && // set argument only when the setting value is not auto
1782- ! backend . startsWith ( 'ik' ) ) // ik fork of llama.cpp doesn't support --flash-attn
1783- ) {
1784- args . push ( '--flash-attn' , String ( cfg . flash_attn ) ) //default: auto = ON when supported
1785- } else if ( backend . startsWith ( 'ik' ) && cfg . flash_attn == 'on' ) {
1786- args . push ( '-fa' ) // hoping the ik fork is still using the old fa arguments
1787- }
1788-
1789- // Boolean flags
1790- if ( cfg . ctx_shift ) args . push ( '--context-shift' )
1791- if ( cfg . cont_batching ) args . push ( '--cont-batching' )
1792- if ( cfg . no_mmap ) args . push ( '--no-mmap' )
1793- if ( cfg . mlock ) args . push ( '--mlock' )
1794- if ( cfg . no_kv_offload ) args . push ( '--no-kv-offload' )
1795- if ( isEmbedding ) {
1796- args . push ( '--embedding' )
1797- args . push ( '--pooling' , 'mean' )
1798- } else {
1799- if ( cfg . ctx_size > 0 ) args . push ( '--ctx-size' , String ( cfg . ctx_size ) )
1800- if ( cfg . n_predict > 0 ) args . push ( '--n-predict' , String ( cfg . n_predict ) )
1801- if ( cfg . cache_type_k && cfg . cache_type_k != 'f16' )
1802- args . push ( '--cache-type-k' , cfg . cache_type_k )
1803- if (
1804- cfg . flash_attn !== 'on' &&
1805- cfg . cache_type_v != 'f16' &&
1806- cfg . cache_type_v != 'f32'
1807- ) {
1808- args . push ( '--cache-type-v' , cfg . cache_type_v )
1809- }
1810- if ( cfg . defrag_thold && cfg . defrag_thold != 0.1 )
1811- args . push ( '--defrag-thold' , String ( cfg . defrag_thold ) )
18121644
1813- if ( cfg . rope_scaling && cfg . rope_scaling != 'none' )
1814- args . push ( '--rope-scaling' , cfg . rope_scaling )
1815- if ( cfg . rope_scale && cfg . rope_scale != 1 )
1816- args . push ( '--rope-scale' , String ( cfg . rope_scale ) )
1817- if ( cfg . rope_freq_base && cfg . rope_freq_base != 0 )
1818- args . push ( '--rope-freq-base' , String ( cfg . rope_freq_base ) )
1819- if ( cfg . rope_freq_scale && cfg . rope_freq_scale != 1 )
1820- args . push ( '--rope-freq-scale' , String ( cfg . rope_freq_scale ) )
1645+ // Resolve mmproj path if present
1646+ let mmprojPath : string | undefined = undefined
1647+ if ( modelConfig . mmproj_path ) {
1648+ mmprojPath = await joinPath ( [ janDataFolderPath , modelConfig . mmproj_path ] )
18211649 }
18221650
1823- logger . info ( 'Calling Tauri command llama_load with args:' , args )
1651+ logger . info (
1652+ 'Calling Tauri command load_llama_model with config:' ,
1653+ JSON . stringify ( cfg )
1654+ )
18241655 const backendPath = await getBackendExePath ( backend , version )
18251656
18261657 try {
18271658 const sInfo = await loadLlamaModel (
18281659 backendPath ,
1829- args ,
1660+ modelId ,
1661+ modelPath ,
1662+ port ,
1663+ cfg ,
18301664 envs ,
1665+ mmprojPath ,
18311666 isEmbedding ,
18321667 Number ( this . timeout )
18331668 )
0 commit comments