|
88 | 88 | POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768 |
89 | 89 | MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120 |
90 | 90 |
|
91 | | -# Exception strings for non-implemented encoder/decoder scenarios |
92 | | - |
93 | | -# Reminder: Please update docs/features/compatibility_matrix.md |
94 | | -# If the feature combo become valid |
95 | | - |
96 | | -STR_NOT_IMPL_ENC_DEC_SWA = \ |
97 | | - "Sliding window attention for encoder/decoder models " + \ |
98 | | - "is not currently supported." |
99 | | - |
100 | | -STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE = \ |
101 | | - "Prefix caching for encoder/decoder models " + \ |
102 | | - "is not currently supported." |
103 | | - |
104 | | -STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL = \ |
105 | | - "Chunked prefill for encoder/decoder models " + \ |
106 | | - "is not currently supported." |
107 | | - |
108 | | -STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP = ( |
109 | | - "Models with logits_soft_cap " |
110 | | - "require FlashInfer backend, which is " |
111 | | - "currently not supported for encoder/decoder " |
112 | | - "models.") |
113 | | - |
114 | | -STR_NOT_IMPL_ENC_DEC_LORA = ("LoRA is not currently " |
115 | | - "supported with encoder/decoder " |
116 | | - "models.") |
117 | | - |
118 | | -STR_NOT_IMPL_ENC_DEC_PP = ("Pipeline parallelism is not " |
119 | | - "currently supported with " |
120 | | - "encoder/decoder models.") |
121 | | - |
122 | | -STR_NOT_IMPL_ENC_DEC_MM = ("Multimodal is not currently " |
123 | | - "supported with encoder/decoder " |
124 | | - "models.") |
125 | | - |
126 | | -STR_NOT_IMPL_ENC_DEC_SPEC_DEC = ("Speculative decoding is not " |
127 | | - "currently supported with encoder/" |
128 | | - "decoder models.") |
129 | | - |
130 | | -STR_NOT_IMPL_ENC_DEC_BACKEND = ("XFormers and Flash-Attention are the only " |
131 | | - "backends currently supported with encoder/" |
132 | | - "decoder models.") |
133 | | - |
134 | | -# Efficiently import all enc/dec error strings |
135 | | -# rather than having to import all of the above |
136 | | -STR_NOT_IMPL_ENC_DEC_ERR_STRS = { |
137 | | - "STR_NOT_IMPL_ENC_DEC_SWA": STR_NOT_IMPL_ENC_DEC_SWA, |
138 | | - "STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE": STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE, |
139 | | - "STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL": |
140 | | - STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL, |
141 | | - "STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP": STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP, |
142 | | - "STR_NOT_IMPL_ENC_DEC_LORA": STR_NOT_IMPL_ENC_DEC_LORA, |
143 | | - "STR_NOT_IMPL_ENC_DEC_PP": STR_NOT_IMPL_ENC_DEC_PP, |
144 | | - "STR_NOT_IMPL_ENC_DEC_MM": STR_NOT_IMPL_ENC_DEC_MM, |
145 | | - "STR_NOT_IMPL_ENC_DEC_SPEC_DEC": STR_NOT_IMPL_ENC_DEC_SPEC_DEC, |
146 | | - "STR_NOT_IMPL_ENC_DEC_BACKEND": STR_NOT_IMPL_ENC_DEC_BACKEND, |
147 | | -} |
148 | | - |
149 | 91 | # Constants related to forcing the attention backend selection |
150 | 92 |
|
151 | 93 | # String name of register which may be set in order to |
|
0 commit comments