@@ -174,6 +174,9 @@ class ModelType:
174174 # llama3.1
175175 llama3_1_8b = 'llama3_1-8b'
176176 llama3_1_8b_instruct = 'llama3_1-8b-instruct'
177+ llama3_1_8b_instruct_awq = 'llama3_1-8b-instruct-awq'
178+ llama3_1_8b_instruct_gptq_int4 = 'llama3_1-8b-instruct-gptq-int4'
179+ llama3_1_8b_instruct_bnb = 'llama3_1-8b-instruct-bnb'
177180 llama3_1_70b = 'llama3_1-70b'
178181 llama3_1_70b_instruct = 'llama3_1-70b-instruct'
179182 llama3_1_70b_instruct_fp8 = 'llama3_1-70b-instruct-fp8'
@@ -4467,6 +4470,37 @@ def get_model_tokenizer_deepseek_vl(model_dir: str,
44674470 requires = ['transformers>=4.43' ],
44684471 ignore_file_pattern = [r'.+\.pth$' ],
44694472 hf_model_id = 'meta-llama/Meta-Llama-3.1-70B' )
4473+ @register_model (
4474+ ModelType .llama3_1_8b_instruct_bnb ,
4475+ 'LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4' ,
4476+ LoRATM .llama ,
4477+ TemplateType .llama3 ,
4478+ support_flash_attn = True ,
4479+ support_vllm = True ,
4480+ requires = ['transformers>=4.43' , 'bitsandbytes' ],
4481+ hf_model_id = 'hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4' )
4482+ @register_model (
4483+ ModelType .llama3_1_8b_instruct_gptq_int4 ,
4484+ 'LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4' ,
4485+ LoRATM .llama ,
4486+ TemplateType .llama3 ,
4487+ support_flash_attn = True ,
4488+ support_vllm = True ,
4489+ requires = ['transformers>=4.43' , 'auto_gptq' ],
4490+ torch_dtype = torch .float16 ,
4491+ function_kwargs = {'gptq_bits' : 4 },
4492+ hf_model_id = 'hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4' )
4493+ @register_model (
4494+ ModelType .llama3_1_8b_instruct_awq ,
4495+ 'LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4' ,
4496+ LoRATM .llama ,
4497+ TemplateType .llama3 ,
4498+ support_flash_attn = True ,
4499+ support_vllm = True ,
4500+ requires = ['transformers>=4.43' , 'autoawq' ],
4501+ torch_dtype = torch .float16 ,
4502+ function_kwargs = {'is_awq' : True },
4503+ hf_model_id = 'hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4' )
44704504@register_model (
44714505 ModelType .llama3_1_8b_instruct ,
44724506 'LLM-Research/Meta-Llama-3.1-8B-Instruct' ,
0 commit comments