|
| 1 | +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang |
1 | 2 |
|
2 | 3 | from fla.layers import ( |
3 | 4 | ABCAttention, |
|
79 | 80 | ) |
80 | 81 |
|
81 | 82 | __all__ = [ |
82 | | - 'ABCAttention', 'ABCForCausalLM', 'ABCModel', |
83 | | - 'Attention', 'TransformerForCausalLM', 'TransformerModel', |
84 | | - 'BasedLinearAttention', |
85 | | - 'BitAttention', 'BitNetForCausalLM', 'BitNetModel', |
86 | | - 'Comba', 'CombaForCausalLM', 'CombaModel', |
87 | | - 'DeltaNet', 'DeltaNetForCausalLM', 'DeltaNetModel', |
88 | | - 'DeltaFormerAttention', 'DeltaFormerForCausalLM', 'DeltaFormerModel', |
89 | | - 'GatedDeltaNet', 'GatedDeltaNetForCausalLM', 'GatedDeltaNetModel', |
90 | | - 'GatedDeltaProduct', 'GatedDeltaProductForCausalLM', 'GatedDeltaProductModel', |
91 | | - 'GatedLinearAttention', 'GLAForCausalLM', 'GLAModel', |
92 | | - 'GatedSlotAttention', 'GSAForCausalLM', 'GSAModel', |
93 | | - 'HGRNAttention', 'HGRNForCausalLM', 'HGRNModel', |
94 | | - 'HGRN2Attention', 'HGRN2ForCausalLM', 'HGRN2Model', |
95 | | - 'LightNetAttention', 'LightNetForCausalLM', 'LightNetModel', |
96 | | - 'LinearAttention', 'LinearAttentionForCausalLM', 'LinearAttentionModel', |
97 | | - 'LogLinearMamba2', 'LogLinearMamba2ForCausalLM', 'LogLinearMamba2Model', |
98 | | - 'MesaNet', 'MesaNetForCausalLM', 'MesaNetModel', |
99 | | - 'MomAttention', 'MomForCausalLM', 'MomModel', |
100 | | - 'MultiheadLatentAttention', 'MLAForCausalLM', 'MLAModel', |
101 | | - 'MultiScaleRetention', 'RetNetForCausalLM', 'RetNetModel', |
102 | | - 'NativeSparseAttention', 'NSAForCausalLM', 'NSAModel', |
103 | | - 'PaTHAttention', 'PaTHAttentionForCausalLM', 'PaTHAttentionModel', |
104 | | - 'ReBasedLinearAttention', |
105 | | - 'RodimusAttention', 'RodimusForCausalLM', 'RodimusModel', |
106 | | - 'RWKV6Attention', 'RWKV6ForCausalLM', 'RWKV6Model', |
107 | | - 'RWKV7Attention', 'RWKV7ForCausalLM', 'RWKV7Model', |
| 83 | + "ABCAttention", |
| 84 | + "ABCForCausalLM", |
| 85 | + "ABCModel", |
| 86 | + "Attention", |
| 87 | + "BasedLinearAttention", |
| 88 | + "BitAttention", |
| 89 | + "BitNetForCausalLM", |
| 90 | + "BitNetModel", |
| 91 | + "Comba", |
| 92 | + "CombaForCausalLM", |
| 93 | + "CombaModel", |
| 94 | + "DeltaFormerAttention", |
| 95 | + "DeltaFormerForCausalLM", |
| 96 | + "DeltaFormerModel", |
| 97 | + "DeltaNet", |
| 98 | + "DeltaNetForCausalLM", |
| 99 | + "DeltaNetModel", |
| 100 | + "GLAForCausalLM", |
| 101 | + "GLAModel", |
| 102 | + "GSAForCausalLM", |
| 103 | + "GSAModel", |
| 104 | + "GatedDeltaNet", |
| 105 | + "GatedDeltaNetForCausalLM", |
| 106 | + "GatedDeltaNetModel", |
| 107 | + "GatedDeltaProduct", |
| 108 | + "GatedDeltaProductForCausalLM", |
| 109 | + "GatedDeltaProductModel", |
| 110 | + "GatedLinearAttention", |
| 111 | + "GatedSlotAttention", |
| 112 | + "HGRN2Attention", |
| 113 | + "HGRN2ForCausalLM", |
| 114 | + "HGRN2Model", |
| 115 | + "HGRNAttention", |
| 116 | + "HGRNForCausalLM", |
| 117 | + "HGRNModel", |
| 118 | + "LightNetAttention", |
| 119 | + "LightNetForCausalLM", |
| 120 | + "LightNetModel", |
| 121 | + "LinearAttention", |
| 122 | + "LinearAttentionForCausalLM", |
| 123 | + "LinearAttentionModel", |
| 124 | + "LogLinearMamba2", |
| 125 | + "LogLinearMamba2ForCausalLM", |
| 126 | + "LogLinearMamba2Model", |
| 127 | + "MLAForCausalLM", |
| 128 | + "MLAModel", |
| 129 | + "MesaNet", |
| 130 | + "MesaNetForCausalLM", |
| 131 | + "MesaNetModel", |
| 132 | + "MomAttention", |
| 133 | + "MomForCausalLM", |
| 134 | + "MomModel", |
| 135 | + "MultiScaleRetention", |
| 136 | + "MultiheadLatentAttention", |
| 137 | + "NSAForCausalLM", |
| 138 | + "NSAModel", |
| 139 | + "NativeSparseAttention", |
| 140 | + "PaTHAttention", |
| 141 | + "PaTHAttentionForCausalLM", |
| 142 | + "PaTHAttentionModel", |
| 143 | + "RWKV6Attention", |
| 144 | + "RWKV6ForCausalLM", |
| 145 | + "RWKV6Model", |
| 146 | + "RWKV7Attention", |
| 147 | + "RWKV7ForCausalLM", |
| 148 | + "RWKV7Model", |
| 149 | + "ReBasedLinearAttention", |
| 150 | + "RetNetForCausalLM", |
| 151 | + "RetNetModel", |
| 152 | + "RodimusAttention", |
| 153 | + "RodimusForCausalLM", |
| 154 | + "RodimusModel", |
| 155 | + "TransformerForCausalLM", |
| 156 | + "TransformerModel", |
108 | 157 | ] |
109 | 158 |
|
110 | | -__version__ = '0.4.1' |
| 159 | +__version__ = "0.4.2" |
0 commit comments