@@ -179,6 +179,8 @@ class ModelType:
179179 yi_1_5_6b_chat_gptq_int4 = 'yi-1_5-6b-chat-gptq-int4'
180180 yi_1_5_9b_chat_awq_int4 = 'yi-1_5-9b-chat-awq-int4'
181181 yi_1_5_9b_chat_gptq_int4 = 'yi-1_5-9b-chat-gptq-int4'
182+ yi_1_5_34b_chat_awq_int4 = 'yi-1_5-34b-chat-awq-int4'
183+ yi_1_5_34b_chat_gptq_int4 = 'yi-1_5-34b-chat-gptq-int4'
182184 yi_1_5_9b = 'yi-1_5-9b'
183185 yi_1_5_9b_chat = 'yi-1_5-9b-chat'
184186 yi_1_5_34b = 'yi-1_5-34b'
@@ -1766,6 +1768,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
17661768 torch_dtype = torch .float16 ,
17671769 function_kwargs = {'is_awq' : True },
17681770 support_flash_attn = True ,
1771+ hf_model_id = 'modelscope/Yi-1.5-6B-Chat-AWQ' ,
17691772 support_vllm = True )
17701773@register_model (
17711774 ModelType .yi_1_5_6b_chat_gptq_int4 ,
@@ -1776,6 +1779,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
17761779 function_kwargs = {'gptq_bits' : 4 },
17771780 torch_dtype = torch .float16 ,
17781781 support_flash_attn = True ,
1782+ hf_model_id = 'modelscope/Yi-1.5-6B-Chat-GPTQ' ,
17791783 support_vllm = True )
17801784@register_model (
17811785 ModelType .yi_1_5_9b_chat_awq_int4 ,
@@ -1786,6 +1790,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
17861790 torch_dtype = torch .float16 ,
17871791 function_kwargs = {'is_awq' : True },
17881792 support_flash_attn = True ,
1793+ hf_model_id = 'modelscope/Yi-1.5-9B-Chat-AWQ' ,
17891794 support_vllm = True )
17901795@register_model (
17911796 ModelType .yi_1_5_9b_chat_gptq_int4 ,
@@ -1796,6 +1801,29 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
17961801 function_kwargs = {'gptq_bits' : 4 },
17971802 torch_dtype = torch .float16 ,
17981803 support_flash_attn = True ,
1804+ hf_model_id = 'modelscope/Yi-1.5-9B-Chat-GPTQ' ,
1805+ support_vllm = True )
1806+ @register_model (
1807+ ModelType .yi_1_5_34b_chat_awq_int4 ,
1808+ 'AI-ModelScope/Yi-1.5-34B-Chat-AWQ' ,
1809+ LoRATM .llama2 ,
1810+ TemplateType .yi1_5 ,
1811+ requires = ['autoawq' ],
1812+ torch_dtype = torch .float16 ,
1813+ function_kwargs = {'is_awq' : True },
1814+ support_flash_attn = True ,
1815+ hf_model_id = 'modelscope/Yi-1.5-34B-Chat-AWQ' ,
1816+ support_vllm = True )
1817+ @register_model (
1818+ ModelType .yi_1_5_34b_chat_gptq_int4 ,
1819+ 'AI-ModelScope/Yi-1.5-34B-Chat-GPTQ' ,
1820+ LoRATM .llama2 ,
1821+ TemplateType .yi1_5 ,
1822+ requires = ['auto_gptq>=0.5' ],
1823+ function_kwargs = {'gptq_bits' : 4 },
1824+ torch_dtype = torch .float16 ,
1825+ support_flash_attn = True ,
1826+ hf_model_id = 'modelscope/Yi-1.5-34B-Chat-GPTQ' ,
17991827 support_vllm = True )
18001828@register_model (
18011829 ModelType .yi_1_5_9b ,
0 commit comments