diff --git a/docs/how-to/llms/local.md b/docs/how-to/llms/local.md index fec271e7..7eb1d487 100644 --- a/docs/how-to/llms/local.md +++ b/docs/how-to/llms/local.md @@ -17,7 +17,7 @@ First, set up your environment to use a Hugging Face model. ```python import os -from dbally.llms.localllm import LocalLLM +from dbally.llms.local import LocalLLM os.environ["HUGGINGFACE_API_KEY"] = "your-api-key" @@ -34,6 +34,8 @@ response = await my_collection.ask("Which LLM should I use?") ## Advanced Usage +### Customizing LLM options + For advanced users, you can customize your LLM using [`LocalLLMOptions`](../../reference/llms/local.md#dbally.llms.clients.local.LocalLLMOptions). Here is a list of available parameters: - `repetition_penalty`: *float or null (optional)* - Penalizes repeated tokens to avoid repetitions. @@ -48,7 +50,7 @@ For advanced users, you can customize your LLM using [`LocalLLMOptions`](../../r ```python import dbally -from dbally.llms.clients.localllm import LocalLLMOptions +from dbally.llms.clients.local import LocalLLMOptions llm = LocalLLM("meta-llama/Meta-Llama-3-8B-Instruct", default_options=LocalLLMOptions(temperature=0.7)) my_collection = dbally.create_collection("my_collection", llm) @@ -63,4 +65,21 @@ response = await my_collection.ask( temperature=0.65, ), ) +``` + +### Using LoRA Adapters + +To use a LoRA adapter with `LocalLLM`, specify the `adapter_name` parameter when creating the instance. It can be either the model id of a PEFT configuration hosted inside a model repo on the Hugging Face Hub, or a path to a directory containing a PEFT configuration file. + +```python + +import os +from dbally.llms.local import LocalLLM + +os.environ["HUGGINGFACE_API_KEY"] = "your-api-key" + +llm = LocalLLM( + model_name="meta-llama/Meta-Llama-3-8B-Instruct", + adapter_name="path/to/your/adapter" +) ``` \ No newline at end of file diff --git a/finetuning/README.md b/finetuning/README.md new file mode 100644 index 00000000..84f70ccd --- /dev/null +++ b/finetuning/README.md @@ -0,0 +1,80 @@ +# How-To: Fine-tune IQL LLM + +This section provides a step-by-step guide to fine-tuning a IQL LLM. + +## Prerequisites + +Before you start, install the required dependencies for fine-tuning LLMs. + +```bash +pip install dbally[finetuning] +``` + +## Customizing the fine-tuning + +You can customize various aspects of the fine-tuning by modifying the config files stored in the `finetuning/dbally_finetuning/config`. + +Here is an example structure of the `config.yaml` file. +```bash +name: +defaults: + - model: + - train_params: + - lora_params: + - qlora_params: + - _self_ + +dataset: + +use_lora: +use_qlora: + +output_dir: +seed: +env_file_path: +overwrite_output_dir: +neptune_enabled: +``` + +The key sections you might want to adjust are `model`, `train_params`, `lora_params` and `qlora_params`. + +### Training parameters (`train_params`) + +The `train_params` section should correspond to [`TrainingArguments`](https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.TrainingArguments). These parameters control the training process, including learning rate, batch size, number of epochs, and more. + +### LoRA parameters (`lora_params`) + +The lora_params section should correspond to [`PeftConfig`](https://huggingface.co/docs/peft/en/package_reference/config#peft.PeftConfig). These parameters control the Low-Rank Adaptation (LoRA) configuration, which helps in fine-tuning large language models efficiently. + +### QLoRA parameters (`qlora_params`) + +The qlora_params section should correspond to [`BitsAndBytesConfig`](https://huggingface.co/docs/transformers/main_classes/quantization#transformers.BitsAndBytesConfig). These parameters control the Quantized LoRA (QLoRA) configuration, which allows for training and inference with quantized weights, reducing memory usage and computational requirements. + +### Model configuration (`model`) +This section defines the model architecture and related parameters. Key elements to include are: + +- `name`: The name or path of the pre-trained model to fine-tune, such as "meta-llama/Meta-Llama-3-8B-Instruct". +- `lora_target_modules`: List of model modules to which LoRA will be applied, for example, ["q_proj", "k_proj", "v_proj", "o_proj"]. +- `torch_dtype`: Data type for model parameters during training, such as bfloat16. +- `context_length`: Maximum context length for the model, e.g., 2048. + +## Using Neptune for Experiment Tracking + +[Neptune](https://neptune.ai/) helps in tracking and logging your experiment metrics, parameters, and other metadata in a centralized location. To enable experiment tracking with Neptune, you need to configure the necessary environment variables. + +Ensure you have the following environment variables set: + +- `NEPTUNE_API_TOKEN`: Your Neptune API token. +- `NEPTUNE_PROJECT`: The name of your Neptune project. + +You can set these variables in your environment or load them from a .env file. + +## Running the Script + +Execute the script from the root directory using the following command: + +```bash +PYTHONPATH=finetuning python finetuning/dbally_finetuning/train.py +``` + +This command runs the fine-tuning process with the specified configuration, and the output will be under the specified `output_dir`. diff --git a/finetuning/dbally_finetuning/__init__.py b/finetuning/dbally_finetuning/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/finetuning/dbally_finetuning/callbacks/neptune_callback.py b/finetuning/dbally_finetuning/callbacks/neptune_callback.py new file mode 100644 index 00000000..a811b5e9 --- /dev/null +++ b/finetuning/dbally_finetuning/callbacks/neptune_callback.py @@ -0,0 +1,46 @@ +import os +from typing import List, Optional, Tuple + +from loguru import logger +from omegaconf import DictConfig +from transformers.integrations import NeptuneCallback + + +def get_neptune_token_and_project_set() -> Tuple[Optional[str], Optional[str]]: + """ + Loads neptune token and project from environment variables. + + Returns: + Neptune token and project values. + """ + + neptune_token = os.getenv("NEPTUNE_API_TOKEN") + neptune_project_name = os.getenv("NEPTUNE_PROJECT") + + if neptune_token is None: + logger.info("neptune token not found") + + if neptune_project_name is None: + logger.info("neptune project name not found") + + return neptune_token, neptune_project_name + + +def create_neptune_callback(config: DictConfig, tags: Optional[List[str]] = None) -> Optional[NeptuneCallback]: + """ + Args: + config: DictConfig with experiment configuration. + tags: Optional tags to be stored in experiments metadata. + + Returns: + Neptune Callback. + """ + + neptune_token, neptune_project_name = get_neptune_token_and_project_set() + + if neptune_token is not None and neptune_project_name is not None: + neptune_callback = NeptuneCallback(project=neptune_project_name, api_token=neptune_token, tags=tags) + neptune_callback.config = config + return neptune_callback + logger.warning("Neptune environment variables not set properly. Neptune won't be used for this experiment.") + return None diff --git a/finetuning/dbally_finetuning/configs/config.yaml b/finetuning/dbally_finetuning/configs/config.yaml new file mode 100644 index 00000000..f78a781c --- /dev/null +++ b/finetuning/dbally_finetuning/configs/config.yaml @@ -0,0 +1,18 @@ +name: +defaults: + - model: llama-3-8b-instruct + - train_params: baseline + - lora_params: baseline + - qlora_params: baseline + - _self_ + +dataset: dsai-alicja-kotyla/text-to-iql-v2 + +use_lora: true +use_qlora: true + +output_dir: reports +seed: 1234 +env_file_path: "x.env" +overwrite_output_dir: true +neptune_enabled: false diff --git a/finetuning/dbally_finetuning/configs/lora_params/baseline.yaml b/finetuning/dbally_finetuning/configs/lora_params/baseline.yaml new file mode 100644 index 00000000..b127677d --- /dev/null +++ b/finetuning/dbally_finetuning/configs/lora_params/baseline.yaml @@ -0,0 +1,5 @@ +r: 64 +lora_alpha: 16 +lora_dropout: 0.1 +bias: "none" +task_type: "CAUSAL_LM" \ No newline at end of file diff --git a/finetuning/dbally_finetuning/configs/model/llama-3-8b-instruct.yaml b/finetuning/dbally_finetuning/configs/model/llama-3-8b-instruct.yaml new file mode 100644 index 00000000..b8d1d9fe --- /dev/null +++ b/finetuning/dbally_finetuning/configs/model/llama-3-8b-instruct.yaml @@ -0,0 +1,4 @@ +name: meta-llama/Meta-Llama-3-8B-Instruct +lora_target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"] +torch_dtype: bfloat16 +context_length: 2048 diff --git a/finetuning/dbally_finetuning/configs/qlora_params/baseline.yaml b/finetuning/dbally_finetuning/configs/qlora_params/baseline.yaml new file mode 100644 index 00000000..221e6567 --- /dev/null +++ b/finetuning/dbally_finetuning/configs/qlora_params/baseline.yaml @@ -0,0 +1,3 @@ +load_in_4bit: true +bnb_4bit_use_double_quant: true +bnb_4bit_quant_type: "nf4" \ No newline at end of file diff --git a/finetuning/dbally_finetuning/configs/train_params/baseline.yaml b/finetuning/dbally_finetuning/configs/train_params/baseline.yaml new file mode 100644 index 00000000..eed3cc50 --- /dev/null +++ b/finetuning/dbally_finetuning/configs/train_params/baseline.yaml @@ -0,0 +1,20 @@ +learning_rate: 2e-05 +per_device_train_batch_size: 8 +gradient_accumulation_steps: 1 +num_train_epochs: 2 +lr_scheduler_type: "cosine" +logging_steps: 10 +bf16: true +fp16: false +gradient_checkpointing: true +logging_strategy: "steps" +max_steps: -1 +output_dir: "output" +seed: 42 +warmup_steps: 24 +save_strategy: "epoch" +save_total_limit: -1 +do_eval: true +evaluation_strategy: "steps" +eval_steps: 40 +per_device_eval_batch_size: 8 \ No newline at end of file diff --git a/finetuning/dbally_finetuning/constants.py b/finetuning/dbally_finetuning/constants.py new file mode 100644 index 00000000..4efe4da4 --- /dev/null +++ b/finetuning/dbally_finetuning/constants.py @@ -0,0 +1,23 @@ +import enum +from typing import Dict + +import torch + + +class DataType(enum.Enum): + """ + Class which represents torch.dtype used to load HuggingFace models. + """ + + FLOAT16 = "float16" + FLOAT32 = "float32" + BFLOAT16 = "bfloat16" + + +DTYPES_MAPPING: Dict[DataType, torch.dtype] = { + DataType.FLOAT16: torch.float16, + DataType.FLOAT32: torch.float32, + DataType.BFLOAT16: torch.bfloat16, +} + +DATASET_TEXT_FIELD = "messages" diff --git a/finetuning/dbally_finetuning/paths.py b/finetuning/dbally_finetuning/paths.py new file mode 100644 index 00000000..53e2ede2 --- /dev/null +++ b/finetuning/dbally_finetuning/paths.py @@ -0,0 +1,7 @@ +"""Module to store useful paths.""" +from pathlib import Path + +import dbally_finetuning + +PATH_SRC = Path(dbally_finetuning.__file__).parents[0] +PATH_CONFIG = PATH_SRC / "configs" diff --git a/finetuning/dbally_finetuning/preprocessing/preprocessor.py b/finetuning/dbally_finetuning/preprocessing/preprocessor.py new file mode 100644 index 00000000..ddbf5810 --- /dev/null +++ b/finetuning/dbally_finetuning/preprocessing/preprocessor.py @@ -0,0 +1,49 @@ +from typing import Optional + +from datasets import Dataset +from dbally_finetuning.constants import DATASET_TEXT_FIELD +from dbally_finetuning.prompt import IQL_GENERATION_TEMPLATE, IQLGenerationPromptFormat +from transformers import PreTrainedTokenizer + +from dbally.prompt.template import PromptTemplate + + +class Preprocessor: + """Interface for preprocessor.""" + + def __init__( + self, tokenizer: PreTrainedTokenizer, prompt_template: Optional[PromptTemplate[IQLGenerationPromptFormat]] + ): + self.tokenizer: PreTrainedTokenizer = tokenizer + self._prompt_template = prompt_template or IQL_GENERATION_TEMPLATE + + def _process_example(self, example: dict): + prompt_format = IQLGenerationPromptFormat( + question=example["question"], + iql_context=example["iql_context"], + iql=example["iql"], + ) + formatted_prompt = self._prompt_template.format_prompt(prompt_format) + + return formatted_prompt.chat + + def process( + self, + dataset: Dataset, + ) -> Dataset: + """ + Returns the dataset with the tokenized input for model. + + Args: + dataset: Dataset. + + Returns: + Dataset. + """ + + processed_input = [self._process_example(example) for example in dataset] + + processed_input = self.tokenizer.apply_chat_template( + processed_input, tokenize=False, add_generation_prompt=False + ) + return Dataset.from_dict({DATASET_TEXT_FIELD: processed_input}) diff --git a/finetuning/dbally_finetuning/prompt.py b/finetuning/dbally_finetuning/prompt.py new file mode 100644 index 00000000..7d0526a9 --- /dev/null +++ b/finetuning/dbally_finetuning/prompt.py @@ -0,0 +1,54 @@ +# pylint: disable=C0301 + +from typing import List + +from dbally.prompt.template import PromptFormat, PromptTemplate + + +class IQLGenerationPromptFormat(PromptFormat): + """ + IQL prompt format, providing a question and filters to be used in the conversation. + """ + + def __init__( + self, + *, + question: str, + iql: str, + iql_context: List[str], + ) -> None: + """ + Constructs a new IQLGenerationPromptFormat instance. + + Args: + question: Question to be asked. + iql: IQL. + iql_context: List of e.g. filters or actions to be used in the prompt. + """ + super().__init__() + self.question = question + self.iql_context = "\n".join([str(iql_context) for iql_context in iql_context]) + self.iql = iql + + +IQL_GENERATION_TEMPLATE = PromptTemplate[IQLGenerationPromptFormat]( + [ + { + "role": "system", + "content": "You have access to API that lets you query a database:\n" + "\n{iql_context}\n" + "Please suggest which one(s) to call and how they should be joined with logic operators (AND, OR, NOT).\n" + "Remember! Don't give any comments, just the function calls.\n" + "The output will look like this:\n" + 'filter1("arg1") AND (NOT filter2(120) OR filter3(True))\n' + "DO NOT INCLUDE arguments names in your response. Only the values.\n" + "You MUST use only these methods:\n" + "\n{iql_context}\n" + "It is VERY IMPORTANT not to use methods other than those listed above." + "If you DON'T KNOW HOW TO ANSWER DON'T SAY \"\", SAY: `UNSUPPORTED QUERY` INSTEAD! " + "This is CRUCIAL, otherwise the system will crash.", + }, + {"role": "user", "content": "{question}"}, + {"role": "assistant", "content": "{iql}"}, + ] +) diff --git a/finetuning/dbally_finetuning/train.py b/finetuning/dbally_finetuning/train.py new file mode 100644 index 00000000..2196616d --- /dev/null +++ b/finetuning/dbally_finetuning/train.py @@ -0,0 +1,20 @@ +# pylint: disable=C0116 +import os +from datetime import datetime + +import hydra +from dbally_finetuning.paths import PATH_CONFIG +from dbally_finetuning.trainer.iql_trainer import IQLTrainer + + +@hydra.main(config_name="config", config_path=str(PATH_CONFIG), version_base=None) +def main(config): + output_dir = os.path.join(config.output_dir, datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + os.makedirs(output_dir, exist_ok=True) + + iql_trainer = IQLTrainer(config, output_dir) + iql_trainer.finetune() + + +if __name__ == "__main__": + main() # pylint: disable=E1120 diff --git a/finetuning/dbally_finetuning/trainer/iql_trainer.py b/finetuning/dbally_finetuning/trainer/iql_trainer.py new file mode 100644 index 00000000..5503f390 --- /dev/null +++ b/finetuning/dbally_finetuning/trainer/iql_trainer.py @@ -0,0 +1,96 @@ +from typing import Dict, Optional + +from datasets import Dataset, load_dataset +from dbally_finetuning.callbacks.neptune_callback import create_neptune_callback +from dbally_finetuning.constants import DATASET_TEXT_FIELD, DTYPES_MAPPING, DataType +from dbally_finetuning.preprocessing.preprocessor import Preprocessor +from dbally_finetuning.prompt import IQL_GENERATION_TEMPLATE +from dotenv import load_dotenv +from omegaconf import DictConfig +from peft import LoraConfig +from transformers import AutoTokenizer, BitsAndBytesConfig, PreTrainedTokenizer, TrainingArguments +from transformers.integrations import NeptuneCallback +from trl import SFTTrainer + + +class IQLTrainer: + """ + IQLTrainer is responsible for setting up and managing the training of an IQL causal. + """ + + def __init__(self, config: DictConfig, output_dir: str): + self.config: DictConfig = config + self.output_dir = output_dir + load_dotenv(config.env_file_path) + + self._set_tokenizer() + self._set_processor() + + self._load_dataset() + self._prepare_dataset() + + self._set_neptune_callback() + + self._set_trainer() + + def _load_dataset(self) -> None: + self._dataset = load_dataset(self.config.dataset) + + def _prepare_dataset(self) -> None: + self._processed_dataset: Dict[str, Dataset] = {} + for split in self._dataset: + self._processed_dataset[split] = self._processor.process(self._dataset[split]) + + def _set_processor(self) -> None: + self._processor: Preprocessor = Preprocessor(self._tokenizer, IQL_GENERATION_TEMPLATE) + + def _set_tokenizer(self) -> None: + self._tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(self.config.model.name) + self._tokenizer.model_max_length = self.config.model.context_length + if self._tokenizer.pad_token_id is None: + self._tokenizer.pad_token_id = self._tokenizer.eos_token_id + + def _set_neptune_callback(self) -> None: + if self.config.neptune_enabled: + neptune_tags = [self.config.model.name] + if self.config.use_lora: + neptune_tags.append("lora") + if self.config.use_qlora: + neptune_tags.append("qlora") + self.neptune_callback: Optional[NeptuneCallback] = create_neptune_callback(self.config, tags=neptune_tags) + + def _set_trainer(self) -> None: + train_params = TrainingArguments(report_to="none", **self.config.train_params) + torch_dtype = DTYPES_MAPPING.get(DataType(self.config.model.torch_dtype)) + + peft_params: Optional[LoraConfig] = None + qlora_params: Optional[BitsAndBytesConfig] = None + + if self.config.use_lora: + peft_params = LoraConfig( + target_modules=list(self.config.model.lora_target_modules), **self.config.lora_params + ) + if self.config.use_qlora: + qlora_params = BitsAndBytesConfig(bnb_4bit_compute_dtype=torch_dtype, **self.config.qlora_params) + + model_params = {"torch_dtype": torch_dtype, "device_map": "auto", "quantization_config": qlora_params} + + self._trainer = SFTTrainer( + model=self.config.model.name, + model_init_kwargs=model_params, + args=train_params, + train_dataset=self._processed_dataset["train"], + eval_dataset=self._processed_dataset["test"], + dataset_text_field=DATASET_TEXT_FIELD, + tokenizer=self._tokenizer, + packing=False, + peft_config=peft_params, + max_seq_length=self.config.model.context_length, + callbacks=[self.neptune_callback] if self.config.neptune_enabled else [], + ) + + def finetune(self): + """ + Initiates the fine-tuning process for the model. + """ + self._trainer.train() diff --git a/setup.cfg b/setup.cfg index 0b9683e1..0a8f69de 100644 --- a/setup.cfg +++ b/setup.cfg @@ -74,6 +74,19 @@ local = accelerate~=0.31.0 torch~=2.2.1 transformers~=4.41.2 +finetuning = + accelerate~=0.32.1 + bitsandbytes~=0.43.1 + datasets~=2.20.0 + hydra-core~=1.3.2 + loguru~=0.7.2 + neptune~=1.10.4 + omegaconf~=2.3.0 + peft~=0.11.1 + sentencepiece~=0.2.0 + torch~=2.3.0 + transformers~=4.42.3 + trl~=0.9.6 [options.packages.find] where = src diff --git a/src/dbally/llms/clients/local.py b/src/dbally/llms/clients/local.py index d77be3f3..f984127e 100644 --- a/src/dbally/llms/clients/local.py +++ b/src/dbally/llms/clients/local.py @@ -2,6 +2,7 @@ from typing import List, Optional, Union import torch +from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer from dbally.audit.events import LLMEvent @@ -41,6 +42,7 @@ def __init__( self, model_name: str, *, + adapter_name: Optional[str] = None, hf_api_key: Optional[str] = None, ) -> None: """ @@ -48,6 +50,7 @@ def __init__( Args: model_name: Name of the model to use. + adapter_name: The name of the LoRA adapter, if any, used to modify the model's weights. hf_api_key: The Hugging Face API key for authentication. """ @@ -56,6 +59,11 @@ def __init__( self.model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.bfloat16, token=hf_api_key ) + + if adapter_name: + self.model = PeftModel.from_pretrained(self.model, adapter_name) + self.model = self.model.merge_and_unload() + self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_api_key) async def call( diff --git a/src/dbally/llms/local.py b/src/dbally/llms/local.py index 198513b3..48cec257 100644 --- a/src/dbally/llms/local.py +++ b/src/dbally/llms/local.py @@ -20,6 +20,7 @@ def __init__( model_name: str, default_options: Optional[LocalLLMOptions] = None, *, + adapter_name: Optional[str] = None, api_key: Optional[str] = None, ) -> None: """ @@ -28,12 +29,14 @@ def __init__( Args: model_name: Name of the model to use. This should be a model from the CausalLM class. default_options: Default options for the LLM. + adapter_name: The name of the LoRA adapter, if any, used to modify the model's weights. api_key: The API key for Hugging Face authentication. """ super().__init__(model_name, default_options) self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_key) self.api_key = api_key + self.adapter_name = adapter_name @cached_property def client(self) -> LocalLLMClient: @@ -43,7 +46,7 @@ def client(self) -> LocalLLMClient: Returns: The client used to interact with the LLM. """ - return LocalLLMClient(model_name=self.model_name, hf_api_key=self.api_key) + return LocalLLMClient(model_name=self.model_name, adapter_name=self.adapter_name, hf_api_key=self.api_key) def count_tokens(self, prompt: PromptTemplate) -> int: """