Log attn_implementation setting in hf_transformers

njhill · njhill · commit 1a514e38a2a0 · 2024-02-08T14:47:14.000-08:00
diff --git a/server/text_generation_server/inference_engine/hf_transformers.py b/server/text_generation_server/inference_engine/hf_transformers.py
@@ -28,6 +28,7 @@ def __init__(
 
         # TODO: consider if Flash Attention should be enabled based on FLASH_ATTENTION=True
         if attn_impl := os.getenv("TRANSFORMERS_ATTN_IMPL"):
+            logger.info(f"Setting attn_implementation to {attn_impl}")
             kwargs["attn_implementation"] = attn_impl
 
         if model_config.model_type == "mpt":