@@ -18,6 +18,7 @@ type ChatCommand = {
1818 wrapper : "auto" | "general" | "llamaChat" | "chatML" ,
1919 contextSize : number ,
2020 grammar : "text" | Parameters < typeof LlamaGrammar . getFor > [ 0 ] ,
21+ threads : number ,
2122 temperature : number ,
2223 topK : number ,
2324 topP : number ,
@@ -76,6 +77,12 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
7677 description : "Restrict the model response to a specific grammar, like JSON for example" ,
7778 group : "Optional:"
7879 } )
80+ . option ( "threads" , {
81+ type : "number" ,
82+ default : 6 ,
83+ description : "Number of threads to use for the evaluation of tokens" ,
84+ group : "Optional:"
85+ } )
7986 . option ( "temperature" , {
8087 alias : "t" ,
8188 type : "number" ,
@@ -107,10 +114,10 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
107114 } ,
108115 async handler ( {
109116 model, systemInfo, systemPrompt, wrapper, contextSize, grammar,
110- temperature, topK, topP, maxTokens
117+ threads , temperature, topK, topP, maxTokens
111118 } ) {
112119 try {
113- await RunChat ( { model, systemInfo, systemPrompt, wrapper, contextSize, grammar, temperature, topK, topP, maxTokens} ) ;
120+ await RunChat ( { model, systemInfo, systemPrompt, wrapper, contextSize, grammar, threads , temperature, topK, topP, maxTokens} ) ;
114121 } catch ( err ) {
115122 console . error ( err ) ;
116123 process . exit ( 1 ) ;
@@ -120,7 +127,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
120127
121128
122129async function RunChat ( {
123- model : modelArg , systemInfo, systemPrompt, wrapper, contextSize, grammar : grammarArg , temperature, topK, topP, maxTokens
130+ model : modelArg , systemInfo, systemPrompt, wrapper, contextSize, grammar : grammarArg , threads , temperature, topK, topP, maxTokens
124131} : ChatCommand ) {
125132 const { LlamaChatSession} = await import ( "../../llamaEvaluator/LlamaChatSession.js" ) ;
126133 const { LlamaModel} = await import ( "../../llamaEvaluator/LlamaModel.js" ) ;
@@ -130,6 +137,7 @@ async function RunChat({
130137 const model = new LlamaModel ( {
131138 modelPath : modelArg ,
132139 contextSize,
140+ threads,
133141 temperature,
134142 topK,
135143 topP
0 commit comments