@@ -423,6 +423,10 @@ impl CandleBackend {
423
423
if dtype != DType :: F16
424
424
|| !cfg ! ( feature = "flash-attn" )
425
425
|| get_runtime_compute_cap ( ) . unwrap ( ) < 80
426
+ || & std:: env:: var ( "USE_FLASH_ATTENTION" )
427
+ . unwrap_or ( "True" . to_string ( ) )
428
+ . to_lowercase ( )
429
+ != "true"
426
430
{
427
431
return Err ( BackendError :: Start ( "Mistral is only supported on Cuda devices in fp16 with flash attention v2 enabled" . to_string ( ) ) ) ;
428
432
}
@@ -435,6 +439,10 @@ impl CandleBackend {
435
439
( Config :: Gte ( config) , Device :: Cuda ( _) ) => {
436
440
if dtype != DType :: F16
437
441
|| !cfg ! ( any( feature = "flash-attn" , feature = "flash-attn-v1" ) )
442
+ || & std:: env:: var ( "USE_FLASH_ATTENTION" )
443
+ . unwrap_or ( "True" . to_string ( ) )
444
+ . to_lowercase ( )
445
+ != "true"
438
446
{
439
447
tracing:: info!( "Starting GTE model on {:?}" , device) ;
440
448
Ok ( Box :: new ( GTEModel :: load ( vb, & config, model_type) . s ( ) ?) )
@@ -447,6 +455,10 @@ impl CandleBackend {
447
455
( Config :: Qwen2 ( config) , Device :: Cuda ( _) ) => {
448
456
if dtype != DType :: F16
449
457
|| !cfg ! ( any( feature = "flash-attn" , feature = "flash-attn-v1" ) )
458
+ || & std:: env:: var ( "USE_FLASH_ATTENTION" )
459
+ . unwrap_or ( "True" . to_string ( ) )
460
+ . to_lowercase ( )
461
+ != "true"
450
462
{
451
463
return Err ( BackendError :: Start ( "Qwen2 is only supported on Cuda devices in fp16 with flash attention v2 enabled" . to_string ( ) ) ) ;
452
464
}
@@ -459,6 +471,10 @@ impl CandleBackend {
459
471
( Config :: Qwen3 ( config) , Device :: Cuda ( _) ) => {
460
472
if dtype != DType :: F16
461
473
|| !cfg ! ( any( feature = "flash-attn" , feature = "flash-attn-v1" ) )
474
+ || & std:: env:: var ( "USE_FLASH_ATTENTION" )
475
+ . unwrap_or ( "True" . to_string ( ) )
476
+ . to_lowercase ( )
477
+ != "true"
462
478
{
463
479
tracing:: info!( "Starting Qwen3 model on {:?}" , device) ;
464
480
Ok ( Box :: new ( Qwen3Model :: load ( vb, & config, model_type) . s ( ) ?) )
0 commit comments