We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 866491a commit 1a514e3Copy full SHA for 1a514e3
server/text_generation_server/inference_engine/hf_transformers.py
@@ -28,6 +28,7 @@ def __init__(
28
29
# TODO: consider if Flash Attention should be enabled based on FLASH_ATTENTION=True
30
if attn_impl := os.getenv("TRANSFORMERS_ATTN_IMPL"):
31
+ logger.info(f"Setting attn_implementation to {attn_impl}")
32
kwargs["attn_implementation"] = attn_impl
33
34
if model_config.model_type == "mpt":
0 commit comments