Merge pull request #5 from explodinggradients/dev

shahules786 · web-flow · commit 729e21f15c95 · 2023-05-08T16:05:24.000+05:30
Fix github workflows
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -38,9 +38,9 @@ jobs:
         sudo apt-get install libsndfile1
         pip install -r requirements.txt
         pip install black
-    - name: Install reward-model
-      run: |
-          pip install -e .[dev]
+    # - name: Install reward-model
+    #   run: |
+    #       pip install -e .
     - name: Run black
-      # run:
-      #   black --check .  --exclude blade2blade/__init__.py
+      run:
+        black --check .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,37 @@
+repos:
+    - repo: https://github.com/ambv/black
+      rev: 22.8.0
+      hooks:
+        - id: black
+
+    # Sort imports
+    - repo: https://github.com/PyCQA/isort
+      rev: 5.10.1
+      hooks:
+      - id: isort
+        args: ["--profile", "black"]
+
+    - repo: https://gitlab.com/pycqa/flake8
+      rev: 5.0.4
+      hooks:
+      - id: flake8
+        args: ['--ignore=E203,E501,F811,E712,W503']
+        exclude: __init__.py
+
+    # Formatting, Whitespace, etc
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v3.2.0
+      hooks:
+      - id: trailing-whitespace
+      - id: check-added-large-files
+        args: ['--maxkb=1000']
+      - id: check-ast
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-xml
+      - id: check-yaml
+      - id: debug-statements
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: mixed-line-ending
+        args: ['--fix=no']
diff --git a/README.md b/README.md
@@ -1,2 +1,19 @@
 # Reward-Model
-Framework for reward model for RLHF. 
+Framework for reward model for RLHF.
+
+
+### Quick Start
+* Inference
+```python
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+MODEL = ""
+
+model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+
+```
+
+* Training
+```bash
+python src/training.py --config-name <your-config-name>
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,9 +1,9 @@
 [project]
-name = "blade2blade"
-description = "Adversarial Training and SFT for Bot Safety Models"
+name = "Reward-Model"
+description = "Reward Model training for LLM alignment"
 version = "0.0.1"
 authors = [
-    { name = "LAION-AI", email = "contact@laion.ai" }
+    { name = "Exploding gradients", email = "shahules786@gmail.com" }
 ]
 readme = "README.md"
 dependencies = [
@@ -12,11 +12,8 @@ dependencies = [
     "transformers>=4.27.4",
     "hydra-core>=1.3.2",
     "tokenizers>=0.13.2",
-]
-
-[project.optional-dependencies]
-dev = [
     "wandb>=0.14.0",
+
 ]
 
 [build-system]
diff --git a/src/config/config.yaml b/src/config/config.yaml
@@ -18,4 +18,4 @@ datasets:
   - webgpt:
         split: "train"
 
-validation_size: 0.15
+validation_size: 0.15
diff --git a/src/datacollator.py b/src/datacollator.py
@@ -1,7 +1,8 @@
-from transformers import PreTrainedTokenizer
-import torch
 from dataclasses import dataclass
 
+import torch
+from transformers import PreTrainedTokenizer
+
 from utils import SPECIAL_TOKENS
 
 
@@ -11,10 +12,18 @@ class RMDataCollator:
     max_length: int = 512
 
     def format_prefix(self, prompts, eos):
-
-        prompts = ["{}{}{}".format(SPECIAL_TOKENS["prompter"] if i%2==0 else SPECIAL_TOKENS["assistant"], prompt, eos) for i,prompt in enumerate(prompts)]
+        prompts = [
+            "{}{}{}".format(
+                SPECIAL_TOKENS["prompter"]
+                if i % 2 == 0
+                else SPECIAL_TOKENS["assistant"],
+                prompt,
+                eos,
+            )
+            for i, prompt in enumerate(prompts)
+        ]
         return "".join(prompts)
-    
+
     def format_suffix(self, answer, eos):
         return "{}{}{}".format(SPECIAL_TOKENS["assistant"], answer, eos)
 
@@ -24,7 +33,7 @@ def process_example(self, example):
         prefix, outputs = example
         prefix = self.format_prefix(prefix, eos)
         outputs = [self.format_suffix(output, eos) for output in outputs]
-        print(prefix,outputs)
+        print(prefix, outputs)
         prefix_tokens = self.tokenizer.encode(prefix)
         input_ids, attention_masks = [], []
         for output in outputs:
diff --git a/src/dataset.py b/src/dataset.py
@@ -1,9 +1,10 @@
+import re
 from collections import defaultdict
-from torch.utils.data import Dataset
+from typing import List, Union
+
 from datasets import load_dataset
-import re
-from typing import Union, List
 from omegaconf import OmegaConf
+from torch.utils.data import Dataset
 
 
 class HFSummary(Dataset):
@@ -16,7 +17,7 @@ def __init__(self, split: Union[List[str], str] = "train"):
         if isinstance(split, OmegaConf):
             self.split = OmegaConf.to_object(split)
         else:
-            self.split = split      
+            self.split = split
         dataset = load_dataset(self.name, "axis", split=self.split)
         self.data_dict = self.prepare_axis(dataset)
         self.postids = list(self.data_dict.keys())
@@ -96,30 +97,36 @@ def __getitem__(self, idx):
 class AnthropicRLFH(Dataset):
     name = "Dahoas/full-hh-rlhf"
 
-    def __init__(self,split:Union[List[str],str]="train"):
+    def __init__(self, split: Union[List[str], str] = "train"):
         super().__init__()
         if isinstance(split, str):
             split = [split]
         if isinstance(split, OmegaConf):
             self.split = OmegaConf.to_object(split)
         else:
             self.split = split
-        dataset = load_dataset(self.name,split=self.split)
+        dataset = load_dataset(self.name, split=self.split)
         self.data_dict = defaultdict(dict)
         id = 0
         for data in dataset:
             for item in data:
-                dialogs = [text.replace("\n\n","").strip() for text in re.split(r'Human:|Assistant:',item["prompt"])]
-                dialogs = [text for text in dialogs if text!=""]
-                self.data_dict[f"prompt{id}"].update({"prompt":dialogs,
-                                           "answers":[item["chosen"], item["rejected"]]})
-                id+=1   
+                dialogs = [
+                    text.replace("\n\n", "").strip()
+                    for text in re.split(r"Human:|Assistant:", item["prompt"])
+                ]
+                dialogs = [text for text in dialogs if text != ""]
+                self.data_dict[f"prompt{id}"].update(
+                    {"prompt": dialogs, "answers": [item["chosen"], item["rejected"]]}
+                )
+                id += 1
 
         self.prompt_ids = list(self.data_dict.keys())
 
-    def __len__(self,):
+    def __len__(
+        self,
+    ):
         return len(self.prompt_ids)
-    
+
     def __getitem__(self, idx):
-        prompt, answers = self.data_dict.get(self.prompt_ids[idx],{}).values()
-        return prompt, answers
+        prompt, answers = self.data_dict.get(self.prompt_ids[idx], {}).values()
+        return prompt, answers
diff --git a/src/model.py b/src/model.py
@@ -1,14 +1,14 @@
 from dataclasses import dataclass
-from turtle import hideturtle
+
+import torch
+from torch import nn
 from transformers import (
+    AutoConfig,
+    AutoModelForSequenceClassification,
     GPTNeoXConfig,
-    GPTNeoXPreTrainedModel,
     GPTNeoXModel,
-    AutoModelForSequenceClassification,
-    AutoConfig,
+    GPTNeoXPreTrainedModel,
 )
-from torch import nn
-import torch
 from transformers.utils import ModelOutput
 
 
@@ -20,11 +20,13 @@ class GPTNeoxRMOuptput(ModelOutput):
 
     logits: torch.FloatTensor = None
 
+
 class GPTNeoXConfigRM(GPTNeoXConfig):
     model_type = "rm_gptneox_config"
+
     def __init__(
         self,
-        pooling = "last",
+        pooling="last",
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -33,7 +35,7 @@ def __init__(
 
 class GPTNeoXRM(GPTNeoXPreTrainedModel):
     config_class = GPTNeoXConfigRM
-    """ 
+    """
     Reward Model
     """
 
@@ -44,7 +46,9 @@ def __init__(
         super().__init__(config)
         self.gpt_neox = GPTNeoXModel(config)
         self.pooling = config.pooling
-        hidden_size = config.hidden_size if self.pooling != "mean-max" else config.hidden_size * 2
+        hidden_size = (
+            config.hidden_size if self.pooling != "mean-max" else config.hidden_size * 2
+        )
         self.out_layer = nn.Linear(hidden_size, 1)
 
     def forward(
@@ -74,22 +78,20 @@ def forward(
                 ) / attention_mask.sum(dim=1).unsqueeze(-1)
         elif self.pooling == "last":
             if attention_mask is None:
-                hidden_states = hidden_states[:,-1,:]
+                hidden_states = hidden_states[:, -1, :]
             else:
                 last_idx = attention_mask.cumsum(1).argmax(1)
-                last_idx = last_idx.view(-1,1,1).expand(-1,1,hidden_states.size(-1))
-                hidden_states = torch.gather(hidden_states,1,last_idx).squeeze(1)
+                last_idx = last_idx.view(-1, 1, 1).expand(-1, 1, hidden_states.size(-1))
+                hidden_states = torch.gather(hidden_states, 1, last_idx).squeeze(1)
         elif self.pooling == "mean-max":
             if attention_mask is None:
                 mean, max = hidden_states.mean(dim=1), hidden_states.max(dim=1).values
-                hidden_states = torch.cat([mean,max],1)
+                hidden_states = torch.cat([mean, max], 1)
             else:
                 mean = (hidden_states * attention_mask.unsqueeze(-1)).sum(
                     dim=1
                 ) / attention_mask.sum(dim=1).unsqueeze(-1)
-                max = (hidden_states * attention_mask.unsqueeze(-1)).max(
-                    dim=1
-                ).values
+                max = (hidden_states * attention_mask.unsqueeze(-1)).max(dim=1).values
                 hidden_states = torch.cat([mean, max], 1)
         else:
             raise ValueError(f"invalid pooling {self.pooling}")
@@ -103,4 +105,4 @@ def forward(
 
 
 AutoConfig.register("rm_gptneox_config", GPTNeoXConfigRM)
-AutoModelForSequenceClassification.register(GPTNeoXConfigRM, GPTNeoXRM)
+AutoModelForSequenceClassification.register(GPTNeoXConfigRM, GPTNeoXRM)
diff --git a/src/trainer.py b/src/trainer.py
@@ -1,13 +1,14 @@
-from typing import Any, Dict, List, Optional, Tuple, Union
-import torch
-from torch import nn
-from transformers import Trainer
+import os
+from typing import Any, Dict, List, Optional, Union
+
 import hydra
+import torch
 from hydra.utils import instantiate
 from omegaconf import DictConfig
-from datacollator import RMDataCollator
-import os
+from torch import nn
+from transformers import Trainer
 
+from datacollator import RMDataCollator
 from loss import RMLoss
 from model import GPTNeoXRM
 from utils import get_tokenizer, prepare_datasets
diff --git a/src/utils.py b/src/utils.py
@@ -1,7 +1,9 @@
-from transformers import AutoTokenizer
+import torch
+from tokenizers import pre_tokenizers
 from torch.utils.data import ConcatDataset, random_split
+from transformers import AutoTokenizer
+
 from dataset import AnthropicRLFH, HFSummary, WebGPT
-import torch
 
 SPECIAL_TOKENS = {"prompter": "|prompter|", "assistant": "|assistant|"}
 generator = torch.Generator().manual_seed(42)
@@ -50,7 +52,8 @@ def prepare_datasets(config):
 
     dataset = ConcatDataset(dataset_list)
     train_dataset, valid_dataset = random_split(
-        dataset, [1 - config.validation_size, config.validation_size],
-        generator=generator
+        dataset,
+        [1 - config.validation_size, config.validation_size],
+        generator=generator,
     )
     return train_dataset, valid_dataset