Save the model weights in a few hundred megabytes size like the BIAS-7B.pth provided by the official. (#75)

Enderfga · web-flow · commit c79d28e66c6c · 2023-07-11T01:32:06.000+08:00
* support adapter weights extraction
diff --git a/llama_adapter_v2_multimodal/docs/train.md b/llama_adapter_v2_multimodal/docs/train.md
@@ -1,9 +1,10 @@
-The training process of LLaMA-Adapter V2 consists of the pre-training and fine-tuning phases. 
+The training process of LLaMA-Adapter V2 consists of the pre-training and fine-tuning phases.
 
 ## Pre-training
+
 ### Data
-* We use multiple datasets with **image-text pairs** for pre-training. The texts are English-only.
 
+* We use multiple datasets with **image-text pairs** for pre-training. The texts are English-only.
 * For each dataset, the meta file should be organized in the `.csv` format as following:
 
   ```
@@ -14,8 +15,8 @@ The training process of LLaMA-Adapter V2 consists of the pre-training and fine-t
   ```
 
   Alternatively, you may modify the [`PretrainDataset`](/data/dataset.py) implementation to adapt to your own meta file format.
-
 * Write a `.yaml` config file to specify the datasets for pre-training:
+
   ```
   META:
     - '/path/to/cc3m.csv'
@@ -25,29 +26,25 @@ The training process of LLaMA-Adapter V2 consists of the pre-training and fine-t
 
 ### Start pre-training
 
-We are now ready to start pre-training (please make sure that the original LLaMA weights are available in `/path/to/llama_model_weights`). 
+We are now ready to start pre-training (please make sure that the original LLaMA weights are available in `/path/to/llama_model_weights`).
 
 ```bash
 . exps/pretrain.sh /path/to/llama_model_weights /path/to/pretrain-data-config.yaml /output/path
 ```
 
-
-
 ## Fine-tuning
 
 ### Data
 
 * We fine-tune LLaMA-Adapter V2 on text-only as well as image-text instruction following datasets.
-
 * The following lists the datasets we use for training our release weights:
 
-  | Name                     | Link                                                         |
-  | ------------------------ | ------------------------------------------------------------ |
-  | alpaca_gpt4_data.json    | [File Link](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM/blob/main/data/alpaca_gpt4_data.json) |
+  | Name                     | Link                                                                                                         |
+  | ------------------------ | ------------------------------------------------------------------------------------------------------------ |
+  | alpaca_gpt4_data.json    | [File Link](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM/blob/main/data/alpaca_gpt4_data.json)    |
   | alpaca_gpt4_data_zh.json | [File Link](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM/blob/main/data/alpaca_gpt4_data_zh.json) |
-  | llava_instruct_150k.json | [File Link](https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/raw/main/llava_instruct_150k.json) |
-  | alpaca_data_zh_51k.json  | [File Link](https://github.com/ymcui/Chinese-LLaMA-Alpaca/blob/main/data/alpaca_data_zh_51k.json) |
-
+  | llava_instruct_150k.json | [File Link](https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/raw/main/llava_instruct_150k.json)   |
+  | alpaca_data_zh_51k.json  | [File Link](https://github.com/ymcui/Chinese-LLaMA-Alpaca/blob/main/data/alpaca_data_zh_51k.json)               |
 * Similar to pre-training, write a `.yaml` config file to specify the datasets for fine-tuning:
 
   ```
@@ -65,3 +62,32 @@ We are now ready to start pre-training (please make sure that the original LLaMA
  /path/to/finetune-data-config.yaml /output/path
 ```
 
+### Test and Save
+
+```python
+import os 
+from llama.llama_adapter import LLaMA_adapter
+import util.misc as misc
+import util.extract_adapter_from_checkpoint as extract
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+llama_dir = "path/to/llama/"
+llama_type = '7B'
+llama_ckpt_dir = os.path.join(llama_dir, llama_type)
+llama_tokenzier_path = os.path.join(llama_dir, 'tokenizer.model')
+model = LLaMA_adapter(llama_ckpt_dir, llama_tokenzier_path)
+
+misc.load_model(model, 'path/to/finetune/checkpoint.pth')
+model.eval()
+model.to(device)
+
+prompt = llama.format_prompt('your prompt')
+img = Image.fromarray(cv2.imread("your image"))
+img = model.clip_transform(img).unsqueeze(0).to(device)
+
+result = model.generate(img, [prompt])[0]
+print(result)
+
+extract.save(model,'path/to/adapter-7B.pth','BIAS') # Please end it with -llama_type.pth
+```
diff --git a/llama_adapter_v2_multimodal/llama/llama_adapter.py b/llama_adapter_v2_multimodal/llama/llama_adapter.py
@@ -274,6 +274,7 @@ def generate(
         return decoded
 
 
+
 _MODELS = {
     "BIAS-7B": "https://github.com/OpenGVLab/LLaMA-Adapter/releases/download/v.2.0.0/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth",
     "LORA-BIAS-7B": "https://github.com/OpenGVLab/LLaMA-Adapter/releases/download/v.2.0.0/1bcbffc43484332672092e0024a8699a6eb5f558161aebf98a7c6b1db67224d1_LORA-BIAS-7B.pth",
diff --git a/llama_adapter_v2_multimodal/util/extract_adapter_from_checkpoint.py b/llama_adapter_v2_multimodal/util/extract_adapter_from_checkpoint.py
@@ -0,0 +1,52 @@
+import torch
+
+def save(full_model, path, model_type = 'BIAS'):
+    if model_type == 'BIAS':
+        keys = [
+            f'visual_blocks.{i}.{key}.{suffix}'
+            for i in range(8)
+            for key in ['norm1', 'attn.qkv', 'attn.proj', 'norm2', 'mlp.fc1', 'mlp.fc2']
+            for suffix in ['weight', 'bias']
+        ] + [
+            f'llama.layers.{i}.{key}'
+            for i in range(32)
+            for key in ['attention.gate', 'attention.wq.bias', 'attention.wo.bias', 'feed_forward.w1.bias', 'feed_forward.w2.bias', 'feed_forward.w3.bias', 'attention_norm.weight', 'ffn_norm.weight']
+        ] + [
+            f'{base_key}.{suffix}'
+            for base_key in ['clip_proj_norm', 'visual_proj_norm', 'visual_proj', 'clip_proj']
+            for suffix in ['weight', 'bias']
+        ] + ['llama.norm.weight', 'visual_query.weight', 'adapter_query.weight']
+
+    
+    elif model_type == 'LORA':
+        keys = [
+            f'visual_blocks.{i}.{key}.{suffix}'
+            for i in range(8)
+            for key in [f'norm{j}' for j in range(1, 3)] + ['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']
+            for suffix in ['weight', 'bias']
+        ] + [
+            f'llama.layers.{i}.{key}'
+            for i in range(32)
+            for key in ['attention.gate', 'attention.wq.bias', 'attention.wo.bias', 'feed_forward.w1.bias', 'feed_forward.w2.bias', 'feed_forward.w3.bias', 'attention_norm.weight', 'ffn_norm.weight']
+                + [f'attention.lora_wk_l{j}.weight' for j in range(1, 3)]
+                + [f'attention.lora_wo_l{j}.weight' for j in range(1, 3)]
+                + [f'feed_forward.lora_w{k}_l{j}.weight' for k in range(1, 4) for j in range(1, 3)]
+                + [f'attention.lora_wq_l{j}.weight' for j in range(1, 3)]
+                + [f'attention.lora_wv_l{j}.weight' for j in range(1, 3)]
+                + ['attention.new_gate']
+        ] + [
+            f'{base_key}.{suffix}'
+            for base_key in ['clip_proj_norm', 'visual_proj_norm', 'visual_proj', 'clip_proj']
+            for suffix in ['weight', 'bias']
+        ] + ['llama.norm.weight', 'visual_query.weight', 'adapter_query.weight']
+
+    ## TODO: Add other model types
+
+    full_model_state_dict = full_model.state_dict()
+    small_weights = {key: full_model_state_dict[key] for key in keys}
+    if model_type == 'BIAS':
+        wrapped_small_weights = {'model': small_weights,'config': {'w_bias': True, 'w_lora': False, 'lora_rank': 16}}
+    elif model_type == 'LORA':
+        wrapped_small_weights = {'model': small_weights,'config': {'w_bias': True, 'w_lora': True,  'lora_rank': 16}}
+    # Save the wrapped small weights
+    torch.save(wrapped_small_weights, path)