File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change 11import torch
2- from transformers import AutoModelForCausalLM , AutoTokenizer
2+ from transformers import LlamaForCausalLM , LlamaTokenizer
33
44MAX_NEW_TOKENS = 128
5- model_name = 'decapoda-research/ llama-7b-hf'
5+ model_name = 'meta- llama/Llama-2 -7b-hf'
66
77text = 'Hamburg is in which country?\n '
8- tokenizer = AutoTokenizer .from_pretrained (model_name )
8+ tokenizer = LlamaTokenizer .from_pretrained (model_name )
99input_ids = tokenizer (text , return_tensors = "pt" ).input_ids
1010
11- free_in_GB = int (torch .cuda .mem_get_info ()[0 ]/ 1024 ** 3 )
1211max_memory = f'{ int (torch .cuda .mem_get_info ()[0 ]/ 1024 ** 3 )- 2 } GB'
1312
1413n_gpus = torch .cuda .device_count ()
1514max_memory = {i : max_memory for i in range (n_gpus )}
1615
17- model = AutoModelForCausalLM .from_pretrained (
16+ model = LlamaForCausalLM .from_pretrained (
1817 model_name ,
1918 device_map = 'auto' ,
2019 load_in_8bit = True ,
2120 max_memory = max_memory
2221)
22+
2323generated_ids = model .generate (input_ids , max_length = MAX_NEW_TOKENS )
2424print (tokenizer .decode (generated_ids [0 ], skip_special_tokens = True ))
You can’t perform that action at this time.
0 commit comments