@@ -68,6 +68,7 @@ type ChatCommand = {
6868 debug : boolean ,
6969 meter : boolean ,
7070 timing : boolean ,
71+ noMmap : boolean ,
7172 printTimings : boolean
7273} ;
7374
@@ -293,6 +294,11 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
293294 default : false ,
294295 description : "Print how how long it took to generate each response"
295296 } )
297+ . option ( "noMmap" , {
298+ type : "boolean" ,
299+ default : false ,
300+ description : "Disable mmap (memory-mapped file) usage"
301+ } )
296302 . option ( "printTimings" , {
297303 alias : "pt" ,
298304 type : "boolean" ,
@@ -306,15 +312,15 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
306312 noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK,
307313 topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
308314 repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory,
309- environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, meter, timing, printTimings
315+ environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, meter, timing, noMmap , printTimings
310316 } ) {
311317 try {
312318 await RunChat ( {
313319 modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize,
314320 batchSize, flashAttention, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, seed,
315321 gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
316322 maxTokens, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, meter,
317- timing, printTimings
323+ timing, noMmap , printTimings
318324 } ) ;
319325 } catch ( err ) {
320326 await new Promise ( ( accept ) => setTimeout ( accept , 0 ) ) ; // wait for logs to finish printing
@@ -330,7 +336,7 @@ async function RunChat({
330336 contextSize, batchSize, flashAttention, noTrimWhitespace, grammar : grammarArg , jsonSchemaGrammarFile : jsonSchemaGrammarFilePath ,
331337 threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine,
332338 repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, tokenPredictionDraftModel,
333- tokenPredictionModelContextSize, debug, meter, timing, printTimings
339+ tokenPredictionModelContextSize, debug, meter, timing, noMmap , printTimings
334340} : ChatCommand ) {
335341 if ( contextSize === - 1 ) contextSize = undefined ;
336342 if ( gpuLayers === - 1 ) gpuLayers = undefined ;
@@ -353,13 +359,16 @@ async function RunChat({
353359 logLevel : llamaLogLevel
354360 } ) ;
355361 const logBatchSize = batchSize != null ;
362+ const useMmap = ! noMmap && llama . supportsMmap ;
356363
357364 const resolvedModelPath = await resolveCommandGgufPath ( modelArg , llama , headers , {
358- flashAttention
365+ flashAttention,
366+ useMmap
359367 } ) ;
360368 const resolvedDraftModelPath = ( tokenPredictionDraftModel != null && tokenPredictionDraftModel !== "" )
361369 ? await resolveCommandGgufPath ( tokenPredictionDraftModel , llama , headers , {
362370 flashAttention,
371+ useMmap,
363372 consoleTitle : "Draft model file"
364373 } )
365374 : undefined ;
@@ -404,6 +413,7 @@ async function RunChat({
404413 ? { fitContext : { contextSize} }
405414 : undefined ,
406415 defaultContextFlashAttention : flashAttention ,
416+ useMmap,
407417 ignoreMemorySafetyChecks : gpuLayers != null ,
408418 onLoadProgress ( loadProgress : number ) {
409419 progressUpdater . setProgress ( loadProgress ) ;
@@ -436,6 +446,7 @@ async function RunChat({
436446 return await llama . loadModel ( {
437447 modelPath : resolvedDraftModelPath ,
438448 defaultContextFlashAttention : flashAttention ,
449+ useMmap,
439450 onLoadProgress ( loadProgress : number ) {
440451 progressUpdater . setProgress ( loadProgress ) ;
441452 } ,
@@ -541,6 +552,7 @@ async function RunChat({
541552 const padTitle = await printCommonInfoLines ( {
542553 context,
543554 draftContext,
555+ useMmap,
544556 printBos : true ,
545557 printEos : true ,
546558 logBatchSize,
0 commit comments