|
88 | 88 | POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
|
89 | 89 | MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
|
90 | 90 |
|
91 |
| -# Exception strings for non-implemented encoder/decoder scenarios |
92 |
| - |
93 |
| -# Reminder: Please update docs/features/compatibility_matrix.md |
94 |
| -# If the feature combo become valid |
95 |
| - |
96 |
| -STR_NOT_IMPL_ENC_DEC_SWA = \ |
97 |
| - "Sliding window attention for encoder/decoder models " + \ |
98 |
| - "is not currently supported." |
99 |
| - |
100 |
| -STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE = \ |
101 |
| - "Prefix caching for encoder/decoder models " + \ |
102 |
| - "is not currently supported." |
103 |
| - |
104 |
| -STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL = \ |
105 |
| - "Chunked prefill for encoder/decoder models " + \ |
106 |
| - "is not currently supported." |
107 |
| - |
108 |
| -STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP = ( |
109 |
| - "Models with logits_soft_cap " |
110 |
| - "require FlashInfer backend, which is " |
111 |
| - "currently not supported for encoder/decoder " |
112 |
| - "models.") |
113 |
| - |
114 |
| -STR_NOT_IMPL_ENC_DEC_LORA = ("LoRA is not currently " |
115 |
| - "supported with encoder/decoder " |
116 |
| - "models.") |
117 |
| - |
118 |
| -STR_NOT_IMPL_ENC_DEC_PP = ("Pipeline parallelism is not " |
119 |
| - "currently supported with " |
120 |
| - "encoder/decoder models.") |
121 |
| - |
122 |
| -STR_NOT_IMPL_ENC_DEC_MM = ("Multimodal is not currently " |
123 |
| - "supported with encoder/decoder " |
124 |
| - "models.") |
125 |
| - |
126 |
| -STR_NOT_IMPL_ENC_DEC_SPEC_DEC = ("Speculative decoding is not " |
127 |
| - "currently supported with encoder/" |
128 |
| - "decoder models.") |
129 |
| - |
130 |
| -STR_NOT_IMPL_ENC_DEC_BACKEND = ("XFormers and Flash-Attention are the only " |
131 |
| - "backends currently supported with encoder/" |
132 |
| - "decoder models.") |
133 |
| - |
134 |
| -# Efficiently import all enc/dec error strings |
135 |
| -# rather than having to import all of the above |
136 |
| -STR_NOT_IMPL_ENC_DEC_ERR_STRS = { |
137 |
| - "STR_NOT_IMPL_ENC_DEC_SWA": STR_NOT_IMPL_ENC_DEC_SWA, |
138 |
| - "STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE": STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE, |
139 |
| - "STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL": |
140 |
| - STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL, |
141 |
| - "STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP": STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP, |
142 |
| - "STR_NOT_IMPL_ENC_DEC_LORA": STR_NOT_IMPL_ENC_DEC_LORA, |
143 |
| - "STR_NOT_IMPL_ENC_DEC_PP": STR_NOT_IMPL_ENC_DEC_PP, |
144 |
| - "STR_NOT_IMPL_ENC_DEC_MM": STR_NOT_IMPL_ENC_DEC_MM, |
145 |
| - "STR_NOT_IMPL_ENC_DEC_SPEC_DEC": STR_NOT_IMPL_ENC_DEC_SPEC_DEC, |
146 |
| - "STR_NOT_IMPL_ENC_DEC_BACKEND": STR_NOT_IMPL_ENC_DEC_BACKEND, |
147 |
| -} |
148 |
| - |
149 | 91 | # Constants related to forcing the attention backend selection
|
150 | 92 |
|
151 | 93 | # String name of register which may be set in order to
|
|
0 commit comments