@@ -56,12 +56,14 @@ export async function interactivelyAskForModel({
56
56
llama,
57
57
modelsDirectory,
58
58
allowLocalModels = true ,
59
- downloadIntent = true
59
+ downloadIntent = true ,
60
+ flashAttention = false
60
61
} : {
61
62
llama : Llama ,
62
63
modelsDirectory ?: string ,
63
64
allowLocalModels ?: boolean ,
64
- downloadIntent ?: boolean
65
+ downloadIntent ?: boolean ,
66
+ flashAttention ?: boolean
65
67
} ) : Promise < string > {
66
68
let localModelFileOptions : ( ModelOption & { type : "localModel" } ) [ ] = [ ] ;
67
69
const recommendedModelOptions : ( ModelOption & { type : "recommendedModel" } ) [ ] = [ ] ;
@@ -112,7 +114,9 @@ export async function interactivelyAskForModel({
112
114
readItems ++ ;
113
115
progressUpdater . setProgress ( readItems / ggufFileNames . length , renderProgress ( ) ) ;
114
116
115
- const compatibilityScore = await ggufInsights ?. configurationResolver . scoreModelConfigurationCompatibility ( ) ;
117
+ const compatibilityScore = await ggufInsights ?. configurationResolver . scoreModelConfigurationCompatibility ( {
118
+ flashAttention : flashAttention && ggufInsights ?. flashAttentionSupported
119
+ } ) ;
116
120
117
121
return {
118
122
type : "localModel" ,
@@ -211,7 +215,7 @@ export async function interactivelyAskForModel({
211
215
try {
212
216
// eslint-disable-next-line no-constant-condition
213
217
while ( true ) {
214
- const minWidth = Math . min ( 80 , process . stdout . columns - 1 ) ;
218
+ const minWidth = Math . min ( 80 + ( flashAttention ? 26 : 0 ) , process . stdout . columns - 1 ) ;
215
219
const selectedItem = await basicChooseFromListConsoleInteraction ( {
216
220
title ( item , rerender ) {
217
221
const title = chalk . bold ( "Select a model:" ) + " " ;
@@ -235,6 +239,17 @@ export async function interactivelyAskForModel({
235
239
( String ( Math . floor ( ( vramState . used / vramState . total ) * 100 * 100 ) / 100 ) + "%" ) + " " +
236
240
chalk . dim ( "(" + bytes ( vramState . used ) + "/" + bytes ( vramState . total ) + ")" ) +
237
241
" "
242
+ ) + (
243
+ ! flashAttention
244
+ ? ""
245
+ : (
246
+ " " +
247
+ chalk . bgGray (
248
+ " " +
249
+ chalk . yellow ( "Flash attention:" ) + " " + "enabled" +
250
+ " "
251
+ )
252
+ )
238
253
)
239
254
) ;
240
255
@@ -273,7 +288,7 @@ export async function interactivelyAskForModel({
273
288
} ,
274
289
items : options ,
275
290
renderItem ( item , focused , rerender ) {
276
- return renderSelectionItem ( item , focused , rerender , activeInteractionController . signal , llama ) ;
291
+ return renderSelectionItem ( item , focused , rerender , activeInteractionController . signal , llama , flashAttention ) ;
277
292
} ,
278
293
canFocusItem ( item ) {
279
294
return item . type === "recommendedModel" || item . type === "localModel" || item . type === "action" ;
@@ -374,7 +389,9 @@ async function askForModelUrlOrPath(allowLocalModels: boolean): Promise<string |
374
389
) ;
375
390
}
376
391
377
- function renderSelectionItem ( item : ModelOption , focused : boolean , rerender : ( ) => void , abortSignal : AbortSignal , llama : Llama ) {
392
+ function renderSelectionItem (
393
+ item : ModelOption , focused : boolean , rerender : ( ) => void , abortSignal : AbortSignal , llama : Llama , flashAttention : boolean
394
+ ) {
378
395
if ( item . type === "localModel" ) {
379
396
let modelText = item . title instanceof Function
380
397
? item . title ( )
@@ -398,7 +415,8 @@ function renderSelectionItem(item: ModelOption, focused: boolean, rerender: () =
398
415
recommendedModelOption : item ,
399
416
abortSignal,
400
417
rerenderOption : rerender ,
401
- llama
418
+ llama,
419
+ flashAttention
402
420
} ) ;
403
421
}
404
422
@@ -542,12 +560,13 @@ function renderCompatibilityPercentageWithColors(percentage: number, {
542
560
}
543
561
544
562
async function selectFileForModelRecommendation ( {
545
- recommendedModelOption, llama, abortSignal, rerenderOption
563
+ recommendedModelOption, llama, abortSignal, rerenderOption, flashAttention
546
564
} : {
547
565
recommendedModelOption : ModelOption & { type : "recommendedModel" } ,
548
566
llama : Llama ,
549
567
abortSignal : AbortSignal ,
550
- rerenderOption ( ) : void
568
+ rerenderOption ( ) : void ,
569
+ flashAttention : boolean
551
570
} ) {
552
571
try {
553
572
let bestScore : number | undefined = undefined ;
@@ -567,7 +586,9 @@ async function selectFileForModelRecommendation({
567
586
if ( abortSignal . aborted )
568
587
return ;
569
588
570
- const compatibilityScore = await ggufInsights . configurationResolver . scoreModelConfigurationCompatibility ( ) ;
589
+ const compatibilityScore = await ggufInsights . configurationResolver . scoreModelConfigurationCompatibility ( {
590
+ flashAttention
591
+ } ) ;
571
592
572
593
if ( bestScore == null || compatibilityScore . compatibilityScore > bestScore ) {
573
594
bestScore = compatibilityScore . compatibilityScore ;
0 commit comments