add LORA-BIAS-7B model

csuhan · csuhan · commit 698fd4b1b752 · 2023-07-05T12:28:00.000Z
diff --git a/llama_adapter_v2_multimodal/README.md b/llama_adapter_v2_multimodal/README.md
@@ -37,6 +37,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 llama_dir = "/path/to/LLaMA/"
 
+# choose from BIAS-7B, LORA-BIAS-7B
 model, preprocess = llama.load("BIAS-7B", llama_dir, device)
 model.eval()
 
@@ -72,7 +73,7 @@ import llama
 print(llama.available_models())
 ```
 
-Now we provide `BIAS-7B`, which fine-tunes the `bias` and `norm` parameters of LLaMA. We will include more pretrained models in the future, such as the LoRA fine-tuning model `LoRA-7B` and partial-tuning model `PARTIAL-7B`.
+Now we provide `BIAS-7B` which fine-tunes the `bias` and `norm` parameters of LLaMA, and `LORA-BIAS-7B` which fine-tunes the `bias`, `norm` and `lora` parameters of LLaMA. We will include more pretrained models in the future, such as the LoRA fine-tuning model `LORA-7B` and partial-tuning model `PARTIAL-7B`.
 
 ## Pre-traininig & Fine-tuning
 See [train.md](docs/train.md)
diff --git a/llama_adapter_v2_multimodal/demo.py b/llama_adapter_v2_multimodal/demo.py
@@ -7,6 +7,7 @@
 
 llama_dir = "/path/to/LLaMA/"
 
+# choose from BIAS-7B, LORA-BIAS-7B
 model, preprocess = llama.load("BIAS-7B", llama_dir, device)
 model.eval()
 
diff --git a/llama_adapter_v2_multimodal/llama/llama.py b/llama_adapter_v2_multimodal/llama/llama.py
@@ -26,6 +26,7 @@ class ModelArgs:
     w_bias: bool = False # use bias tuning
     w_lora: bool = False # use lora tuning
     lora_rank: int = 16
+    w_new_gate: bool = False # for compatibility
 
 
 class RMSNorm(torch.nn.Module):
@@ -125,6 +126,10 @@ def __init__(self, args: ModelArgs):
         self.cache_v = None
 
         self.gate = torch.nn.Parameter(torch.zeros(1, self.n_local_heads, 1, 1))
+        
+        self.w_new_gate = args.w_new_gate
+        if args.w_new_gate:
+            self.new_gate = torch.nn.Parameter(torch.ones(1, 1, 1, 1))
 
 
     def train(self, mode: bool = True):
@@ -194,6 +199,8 @@ def forward(self, x: torch.Tensor, start_pos: int, freqs_cis: torch.Tensor, mask
             if adapter_len > 1:
                 adapter_scores = torch.matmul(xq, adapter_k.transpose(2, 3)) / math.sqrt(self.head_dim)
                 adapter_scores = self.gate.tanh() * F.softmax(adapter_scores.float(), dim=-1).type_as(xq)
+                if self.w_new_gate:
+                    adapter_scores = self.new_gate * adapter_scores
                 output = output + torch.matmul(adapter_scores, adapter_v)
             else:
                 output = output + self.gate.tanh() * adapter_v
diff --git a/llama_adapter_v2_multimodal/llama/llama_adapter.py b/llama_adapter_v2_multimodal/llama/llama_adapter.py
@@ -20,6 +20,9 @@ def __init__(self, llama_ckpt_dir, llama_tokenizer,
                  v_embed_dim=768, v_depth=8,
                  v_num_heads=16, v_mlp_ratio=4.0,
                  query_len=10, query_layer=31,
+                 w_bias=False, 
+                 w_lora=False, lora_rank=16, 
+                 w_new_gate=False,
                  phase="finetune"):
         super().__init__()
 
@@ -58,6 +61,9 @@ def __init__(self, llama_ckpt_dir, llama_tokenizer,
 
         # 5. llama
         model_args.w_bias = w_bias
+        model_args.w_lora = w_lora
+        model_args.lora_rank = lora_rank
+        model_args.w_new_gate = w_new_gate
         model_args.vocab_size = self.tokenizer.n_words
         torch.set_default_tensor_type(torch.cuda.HalfTensor)
         self.llama = Transformer(model_args)
@@ -270,6 +276,7 @@ def generate(
 
 _MODELS = {
     "BIAS-7B": "https://github.com/OpenGVLab/LLaMA-Adapter/releases/download/v.2.0.0/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth",
+    "LORA-BIAS-7B": "https://github.com/OpenGVLab/LLaMA-Adapter/releases/download/v.2.0.0/1bcbffc43484332672092e0024a8699a6eb5f558161aebf98a7c6b1db67224d1_LORA-BIAS-7B.pth",
     # "LORA16-7B": "",
     # "PARTIAL-7B": ""
 }
@@ -284,10 +291,8 @@ def load(name, llama_dir, device="cuda" if torch.cuda.is_available() else "cpu",
     elif os.path.isfile(name):
         model_path = name
     else:
-        return RuntimeError(f"Model {name} not found; available models = {available_models()}")
+        return RuntimeError(f"Model {name} not found; available models = {available_models()}"), None
 
-    ckpt = torch.load(model_path, map_location='cpu')
-    
     # BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B
     llama_type = name.split('.')[0].split('-')[-1]
     llama_ckpt_dir = os.path.join(llama_dir, llama_type)
@@ -296,6 +301,7 @@ def load(name, llama_dir, device="cuda" if torch.cuda.is_available() else "cpu",
     # load llama_adapter weights and model_cfg
     print(f'Loading LLaMA-Adapter from {model_path}')
     ckpt = torch.load(model_path, map_location='cpu')
+    model_cfg = ckpt.get('config', {})
 
     model = LLaMA_adapter(
         llama_ckpt_dir, llama_tokenzier_path,
@@ -304,6 +310,10 @@ def load(name, llama_dir, device="cuda" if torch.cuda.is_available() else "cpu",
         v_embed_dim=768, v_depth=8,
         v_num_heads=16, v_mlp_ratio=4.0,
         query_len=10, query_layer=31,
+        w_bias=model_cfg.get('w_bias', False), 
+        w_lora=model_cfg.get('w_lora', False), 
+        lora_rank=model_cfg.get('lora_rank', 16),
+        w_new_gate=model_cfg.get('w_lora', False), # for compatibility
         phase=phase)
 
     load_result = model.load_state_dict(ckpt['model'], strict=False)