diff --git a/docs/how-to/llms/local.md b/docs/how-to/llms/local.md
index fec271e7..7eb1d487 100644
--- a/docs/how-to/llms/local.md
+++ b/docs/how-to/llms/local.md
@@ -17,7 +17,7 @@ First, set up your environment to use a Hugging Face model.
 ```python
 
 import os
-from dbally.llms.localllm import LocalLLM
+from dbally.llms.local import LocalLLM
 
 os.environ["HUGGINGFACE_API_KEY"] = "your-api-key"
 
@@ -34,6 +34,8 @@ response = await my_collection.ask("Which LLM should I use?")
 
 ## Advanced Usage
 
+### Customizing LLM options
+
 For advanced users, you can customize your LLM using [`LocalLLMOptions`](../../reference/llms/local.md#dbally.llms.clients.local.LocalLLMOptions). Here is a list of available parameters:
 
 -   `repetition_penalty`: *float or null (optional)* - Penalizes repeated tokens to avoid repetitions.
@@ -48,7 +50,7 @@ For advanced users, you can customize your LLM using [`LocalLLMOptions`](../../r
 
 ```python
 import dbally
-from dbally.llms.clients.localllm import LocalLLMOptions
+from dbally.llms.clients.local import LocalLLMOptions
 
 llm = LocalLLM("meta-llama/Meta-Llama-3-8B-Instruct", default_options=LocalLLMOptions(temperature=0.7))
 my_collection = dbally.create_collection("my_collection", llm)
@@ -63,4 +65,21 @@ response = await my_collection.ask(
         temperature=0.65,
     ),
 )
+```
+
+### Using LoRA Adapters
+
+To use a LoRA adapter with `LocalLLM`, specify the `adapter_name` parameter when creating the instance. It can be either the model id of a PEFT configuration hosted inside a model repo on the Hugging Face Hub, or a path to a directory containing a PEFT configuration file.
+
+```python
+
+import os
+from dbally.llms.local import LocalLLM
+
+os.environ["HUGGINGFACE_API_KEY"] = "your-api-key"
+
+llm = LocalLLM(
+    model_name="meta-llama/Meta-Llama-3-8B-Instruct",
+    adapter_name="path/to/your/adapter"
+)
 ```
\ No newline at end of file
diff --git a/finetuning/README.md b/finetuning/README.md
new file mode 100644
index 00000000..84f70ccd
--- /dev/null
+++ b/finetuning/README.md
@@ -0,0 +1,80 @@
+# How-To: Fine-tune IQL LLM
+
+This section provides a step-by-step guide to fine-tuning a IQL LLM.
+
+## Prerequisites
+
+Before you start, install the required dependencies for fine-tuning LLMs.
+
+```bash
+pip install dbally[finetuning]
+```
+
+## Customizing the fine-tuning
+
+You can customize various aspects of the fine-tuning by modifying the config files stored in the `finetuning/dbally_finetuning/config`.
+
+Here is an example structure of the `config.yaml` file.
+```bash
+name:
+defaults:
+  - model: <model-name>
+  - train_params: <train-params-name>
+  - lora_params: <lora-params-name>
+  - qlora_params: <qlora-params-name>
+  - _self_
+
+dataset: <dataset-name>
+
+use_lora: <true/false>
+use_qlora: <true/false>
+
+output_dir: <output-directory>
+seed: <random-seed>
+env_file_path: <path-to-env-file>
+overwrite_output_dir: <true/false>
+neptune_enabled: <true/false>
+```
+
+The key sections you might want to adjust are `model`, `train_params`, `lora_params` and `qlora_params`.
+
+### Training parameters (`train_params`)
+
+The `train_params` section should correspond to [`TrainingArguments`](https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.TrainingArguments). These parameters control the training process, including learning rate, batch size, number of epochs, and more.
+
+### LoRA parameters (`lora_params`)
+
+The lora_params section should correspond to [`PeftConfig`](https://huggingface.co/docs/peft/en/package_reference/config#peft.PeftConfig). These parameters control the Low-Rank Adaptation (LoRA) configuration, which helps in fine-tuning large language models efficiently.
+
+### QLoRA parameters (`qlora_params`)
+
+The qlora_params section should correspond to [`BitsAndBytesConfig`](https://huggingface.co/docs/transformers/main_classes/quantization#transformers.BitsAndBytesConfig). These parameters control the Quantized LoRA (QLoRA) configuration, which allows for training and inference with quantized weights, reducing memory usage and computational requirements.
+
+### Model configuration (`model`)
+This section defines the model architecture and related parameters. Key elements to include are:
+
+-   `name`: The name or path of the pre-trained model to fine-tune, such as "meta-llama/Meta-Llama-3-8B-Instruct".
+-   `lora_target_modules`: List of model modules to which LoRA will be applied, for example, ["q_proj", "k_proj", "v_proj", "o_proj"].
+-   `torch_dtype`: Data type for model parameters during training, such as bfloat16.
+-   `context_length`: Maximum context length for the model, e.g., 2048.
+
+## Using Neptune for Experiment Tracking
+
+[Neptune](https://neptune.ai/) helps in tracking and logging your experiment metrics, parameters, and other metadata in a centralized location. To enable experiment tracking with Neptune, you need to configure the necessary environment variables.
+
+Ensure you have the following environment variables set:
+
+-   `NEPTUNE_API_TOKEN`: Your Neptune API token.
+-   `NEPTUNE_PROJECT`: The name of your Neptune project.
+
+You can set these variables in your environment or load them from a .env file.
+
+## Running the Script
+
+Execute the script from the root directory using the following command:
+
+```bash
+PYTHONPATH=finetuning python finetuning/dbally_finetuning/train.py
+```
+
+This command runs the fine-tuning process with the specified configuration, and the output will be under the specified `output_dir`.
diff --git a/finetuning/dbally_finetuning/__init__.py b/finetuning/dbally_finetuning/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/finetuning/dbally_finetuning/callbacks/neptune_callback.py b/finetuning/dbally_finetuning/callbacks/neptune_callback.py
new file mode 100644
index 00000000..a811b5e9
--- /dev/null
+++ b/finetuning/dbally_finetuning/callbacks/neptune_callback.py
@@ -0,0 +1,46 @@
+import os
+from typing import List, Optional, Tuple
+
+from loguru import logger
+from omegaconf import DictConfig
+from transformers.integrations import NeptuneCallback
+
+
+def get_neptune_token_and_project_set() -> Tuple[Optional[str], Optional[str]]:
+    """
+    Loads neptune token and project from environment variables.
+
+    Returns:
+        Neptune token and project values.
+    """
+
+    neptune_token = os.getenv("NEPTUNE_API_TOKEN")
+    neptune_project_name = os.getenv("NEPTUNE_PROJECT")
+
+    if neptune_token is None:
+        logger.info("neptune token not found")
+
+    if neptune_project_name is None:
+        logger.info("neptune project name not found")
+
+    return neptune_token, neptune_project_name
+
+
+def create_neptune_callback(config: DictConfig, tags: Optional[List[str]] = None) -> Optional[NeptuneCallback]:
+    """
+    Args:
+        config: DictConfig with experiment configuration.
+        tags: Optional tags to be stored in experiments metadata.
+
+    Returns:
+        Neptune Callback.
+    """
+
+    neptune_token, neptune_project_name = get_neptune_token_and_project_set()
+
+    if neptune_token is not None and neptune_project_name is not None:
+        neptune_callback = NeptuneCallback(project=neptune_project_name, api_token=neptune_token, tags=tags)
+        neptune_callback.config = config
+        return neptune_callback
+    logger.warning("Neptune environment variables not set properly. Neptune won't be used for this experiment.")
+    return None
diff --git a/finetuning/dbally_finetuning/configs/config.yaml b/finetuning/dbally_finetuning/configs/config.yaml
new file mode 100644
index 00000000..f78a781c
--- /dev/null
+++ b/finetuning/dbally_finetuning/configs/config.yaml
@@ -0,0 +1,18 @@
+name:
+defaults:
+  - model: llama-3-8b-instruct
+  - train_params: baseline
+  - lora_params: baseline
+  - qlora_params: baseline
+  - _self_
+
+dataset: dsai-alicja-kotyla/text-to-iql-v2
+
+use_lora: true
+use_qlora: true
+
+output_dir: reports
+seed: 1234
+env_file_path: "x.env"
+overwrite_output_dir: true
+neptune_enabled: false
diff --git a/finetuning/dbally_finetuning/configs/lora_params/baseline.yaml b/finetuning/dbally_finetuning/configs/lora_params/baseline.yaml
new file mode 100644
index 00000000..b127677d
--- /dev/null
+++ b/finetuning/dbally_finetuning/configs/lora_params/baseline.yaml
@@ -0,0 +1,5 @@
+r: 64
+lora_alpha: 16
+lora_dropout: 0.1
+bias: "none"
+task_type: "CAUSAL_LM"
\ No newline at end of file
diff --git a/finetuning/dbally_finetuning/configs/model/llama-3-8b-instruct.yaml b/finetuning/dbally_finetuning/configs/model/llama-3-8b-instruct.yaml
new file mode 100644
index 00000000..b8d1d9fe
--- /dev/null
+++ b/finetuning/dbally_finetuning/configs/model/llama-3-8b-instruct.yaml
@@ -0,0 +1,4 @@
+name: meta-llama/Meta-Llama-3-8B-Instruct
+lora_target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
+torch_dtype: bfloat16
+context_length: 2048
diff --git a/finetuning/dbally_finetuning/configs/qlora_params/baseline.yaml b/finetuning/dbally_finetuning/configs/qlora_params/baseline.yaml
new file mode 100644
index 00000000..221e6567
--- /dev/null
+++ b/finetuning/dbally_finetuning/configs/qlora_params/baseline.yaml
@@ -0,0 +1,3 @@
+load_in_4bit: true
+bnb_4bit_use_double_quant: true
+bnb_4bit_quant_type: "nf4"
\ No newline at end of file
diff --git a/finetuning/dbally_finetuning/configs/train_params/baseline.yaml b/finetuning/dbally_finetuning/configs/train_params/baseline.yaml
new file mode 100644
index 00000000..eed3cc50
--- /dev/null
+++ b/finetuning/dbally_finetuning/configs/train_params/baseline.yaml
@@ -0,0 +1,20 @@
+learning_rate: 2e-05
+per_device_train_batch_size: 8
+gradient_accumulation_steps: 1
+num_train_epochs: 2
+lr_scheduler_type: "cosine"
+logging_steps: 10
+bf16: true
+fp16: false
+gradient_checkpointing: true
+logging_strategy: "steps"
+max_steps: -1
+output_dir: "output"
+seed: 42
+warmup_steps: 24
+save_strategy: "epoch"
+save_total_limit: -1
+do_eval: true
+evaluation_strategy: "steps"
+eval_steps: 40
+per_device_eval_batch_size: 8
\ No newline at end of file
diff --git a/finetuning/dbally_finetuning/constants.py b/finetuning/dbally_finetuning/constants.py
new file mode 100644
index 00000000..4efe4da4
--- /dev/null
+++ b/finetuning/dbally_finetuning/constants.py
@@ -0,0 +1,23 @@
+import enum
+from typing import Dict
+
+import torch
+
+
+class DataType(enum.Enum):
+    """
+    Class which represents torch.dtype used to load HuggingFace models.
+    """
+
+    FLOAT16 = "float16"
+    FLOAT32 = "float32"
+    BFLOAT16 = "bfloat16"
+
+
+DTYPES_MAPPING: Dict[DataType, torch.dtype] = {
+    DataType.FLOAT16: torch.float16,
+    DataType.FLOAT32: torch.float32,
+    DataType.BFLOAT16: torch.bfloat16,
+}
+
+DATASET_TEXT_FIELD = "messages"
diff --git a/finetuning/dbally_finetuning/paths.py b/finetuning/dbally_finetuning/paths.py
new file mode 100644
index 00000000..53e2ede2
--- /dev/null
+++ b/finetuning/dbally_finetuning/paths.py
@@ -0,0 +1,7 @@
+"""Module to store useful paths."""
+from pathlib import Path
+
+import dbally_finetuning
+
+PATH_SRC = Path(dbally_finetuning.__file__).parents[0]
+PATH_CONFIG = PATH_SRC / "configs"
diff --git a/finetuning/dbally_finetuning/preprocessing/preprocessor.py b/finetuning/dbally_finetuning/preprocessing/preprocessor.py
new file mode 100644
index 00000000..ddbf5810
--- /dev/null
+++ b/finetuning/dbally_finetuning/preprocessing/preprocessor.py
@@ -0,0 +1,49 @@
+from typing import Optional
+
+from datasets import Dataset
+from dbally_finetuning.constants import DATASET_TEXT_FIELD
+from dbally_finetuning.prompt import IQL_GENERATION_TEMPLATE, IQLGenerationPromptFormat
+from transformers import PreTrainedTokenizer
+
+from dbally.prompt.template import PromptTemplate
+
+
+class Preprocessor:
+    """Interface for preprocessor."""
+
+    def __init__(
+        self, tokenizer: PreTrainedTokenizer, prompt_template: Optional[PromptTemplate[IQLGenerationPromptFormat]]
+    ):
+        self.tokenizer: PreTrainedTokenizer = tokenizer
+        self._prompt_template = prompt_template or IQL_GENERATION_TEMPLATE
+
+    def _process_example(self, example: dict):
+        prompt_format = IQLGenerationPromptFormat(
+            question=example["question"],
+            iql_context=example["iql_context"],
+            iql=example["iql"],
+        )
+        formatted_prompt = self._prompt_template.format_prompt(prompt_format)
+
+        return formatted_prompt.chat
+
+    def process(
+        self,
+        dataset: Dataset,
+    ) -> Dataset:
+        """
+        Returns the dataset with the tokenized input for model.
+
+        Args:
+            dataset: Dataset.
+
+        Returns:
+            Dataset.
+        """
+
+        processed_input = [self._process_example(example) for example in dataset]
+
+        processed_input = self.tokenizer.apply_chat_template(
+            processed_input, tokenize=False, add_generation_prompt=False
+        )
+        return Dataset.from_dict({DATASET_TEXT_FIELD: processed_input})
diff --git a/finetuning/dbally_finetuning/prompt.py b/finetuning/dbally_finetuning/prompt.py
new file mode 100644
index 00000000..7d0526a9
--- /dev/null
+++ b/finetuning/dbally_finetuning/prompt.py
@@ -0,0 +1,54 @@
+# pylint: disable=C0301
+
+from typing import List
+
+from dbally.prompt.template import PromptFormat, PromptTemplate
+
+
+class IQLGenerationPromptFormat(PromptFormat):
+    """
+    IQL prompt format, providing a question and filters to be used in the conversation.
+    """
+
+    def __init__(
+        self,
+        *,
+        question: str,
+        iql: str,
+        iql_context: List[str],
+    ) -> None:
+        """
+        Constructs a new IQLGenerationPromptFormat instance.
+
+        Args:
+            question: Question to be asked.
+            iql: IQL.
+            iql_context: List of e.g. filters or actions to be used in the prompt.
+        """
+        super().__init__()
+        self.question = question
+        self.iql_context = "\n".join([str(iql_context) for iql_context in iql_context])
+        self.iql = iql
+
+
+IQL_GENERATION_TEMPLATE = PromptTemplate[IQLGenerationPromptFormat](
+    [
+        {
+            "role": "system",
+            "content": "You have access to API that lets you query a database:\n"
+            "\n{iql_context}\n"
+            "Please suggest which one(s) to call and how they should be joined with logic operators (AND, OR, NOT).\n"
+            "Remember! Don't give any comments, just the function calls.\n"
+            "The output will look like this:\n"
+            'filter1("arg1") AND (NOT filter2(120) OR filter3(True))\n'
+            "DO NOT INCLUDE arguments names in your response. Only the values.\n"
+            "You MUST use only these methods:\n"
+            "\n{iql_context}\n"
+            "It is VERY IMPORTANT not to use methods other than those listed above."
+            "If you DON'T KNOW HOW TO ANSWER DON'T SAY \"\", SAY: `UNSUPPORTED QUERY` INSTEAD! "
+            "This is CRUCIAL, otherwise the system will crash.",
+        },
+        {"role": "user", "content": "{question}"},
+        {"role": "assistant", "content": "{iql}"},
+    ]
+)
diff --git a/finetuning/dbally_finetuning/train.py b/finetuning/dbally_finetuning/train.py
new file mode 100644
index 00000000..2196616d
--- /dev/null
+++ b/finetuning/dbally_finetuning/train.py
@@ -0,0 +1,20 @@
+# pylint: disable=C0116
+import os
+from datetime import datetime
+
+import hydra
+from dbally_finetuning.paths import PATH_CONFIG
+from dbally_finetuning.trainer.iql_trainer import IQLTrainer
+
+
+@hydra.main(config_name="config", config_path=str(PATH_CONFIG), version_base=None)
+def main(config):
+    output_dir = os.path.join(config.output_dir, datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
+    os.makedirs(output_dir, exist_ok=True)
+
+    iql_trainer = IQLTrainer(config, output_dir)
+    iql_trainer.finetune()
+
+
+if __name__ == "__main__":
+    main()  # pylint: disable=E1120
diff --git a/finetuning/dbally_finetuning/trainer/iql_trainer.py b/finetuning/dbally_finetuning/trainer/iql_trainer.py
new file mode 100644
index 00000000..5503f390
--- /dev/null
+++ b/finetuning/dbally_finetuning/trainer/iql_trainer.py
@@ -0,0 +1,96 @@
+from typing import Dict, Optional
+
+from datasets import Dataset, load_dataset
+from dbally_finetuning.callbacks.neptune_callback import create_neptune_callback
+from dbally_finetuning.constants import DATASET_TEXT_FIELD, DTYPES_MAPPING, DataType
+from dbally_finetuning.preprocessing.preprocessor import Preprocessor
+from dbally_finetuning.prompt import IQL_GENERATION_TEMPLATE
+from dotenv import load_dotenv
+from omegaconf import DictConfig
+from peft import LoraConfig
+from transformers import AutoTokenizer, BitsAndBytesConfig, PreTrainedTokenizer, TrainingArguments
+from transformers.integrations import NeptuneCallback
+from trl import SFTTrainer
+
+
+class IQLTrainer:
+    """
+    IQLTrainer is responsible for setting up and managing the training of an IQL causal.
+    """
+
+    def __init__(self, config: DictConfig, output_dir: str):
+        self.config: DictConfig = config
+        self.output_dir = output_dir
+        load_dotenv(config.env_file_path)
+
+        self._set_tokenizer()
+        self._set_processor()
+
+        self._load_dataset()
+        self._prepare_dataset()
+
+        self._set_neptune_callback()
+
+        self._set_trainer()
+
+    def _load_dataset(self) -> None:
+        self._dataset = load_dataset(self.config.dataset)
+
+    def _prepare_dataset(self) -> None:
+        self._processed_dataset: Dict[str, Dataset] = {}
+        for split in self._dataset:
+            self._processed_dataset[split] = self._processor.process(self._dataset[split])
+
+    def _set_processor(self) -> None:
+        self._processor: Preprocessor = Preprocessor(self._tokenizer, IQL_GENERATION_TEMPLATE)
+
+    def _set_tokenizer(self) -> None:
+        self._tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(self.config.model.name)
+        self._tokenizer.model_max_length = self.config.model.context_length
+        if self._tokenizer.pad_token_id is None:
+            self._tokenizer.pad_token_id = self._tokenizer.eos_token_id
+
+    def _set_neptune_callback(self) -> None:
+        if self.config.neptune_enabled:
+            neptune_tags = [self.config.model.name]
+            if self.config.use_lora:
+                neptune_tags.append("lora")
+            if self.config.use_qlora:
+                neptune_tags.append("qlora")
+            self.neptune_callback: Optional[NeptuneCallback] = create_neptune_callback(self.config, tags=neptune_tags)
+
+    def _set_trainer(self) -> None:
+        train_params = TrainingArguments(report_to="none", **self.config.train_params)
+        torch_dtype = DTYPES_MAPPING.get(DataType(self.config.model.torch_dtype))
+
+        peft_params: Optional[LoraConfig] = None
+        qlora_params: Optional[BitsAndBytesConfig] = None
+
+        if self.config.use_lora:
+            peft_params = LoraConfig(
+                target_modules=list(self.config.model.lora_target_modules), **self.config.lora_params
+            )
+        if self.config.use_qlora:
+            qlora_params = BitsAndBytesConfig(bnb_4bit_compute_dtype=torch_dtype, **self.config.qlora_params)
+
+        model_params = {"torch_dtype": torch_dtype, "device_map": "auto", "quantization_config": qlora_params}
+
+        self._trainer = SFTTrainer(
+            model=self.config.model.name,
+            model_init_kwargs=model_params,
+            args=train_params,
+            train_dataset=self._processed_dataset["train"],
+            eval_dataset=self._processed_dataset["test"],
+            dataset_text_field=DATASET_TEXT_FIELD,
+            tokenizer=self._tokenizer,
+            packing=False,
+            peft_config=peft_params,
+            max_seq_length=self.config.model.context_length,
+            callbacks=[self.neptune_callback] if self.config.neptune_enabled else [],
+        )
+
+    def finetune(self):
+        """
+        Initiates the fine-tuning process for the model.
+        """
+        self._trainer.train()
diff --git a/setup.cfg b/setup.cfg
index 0b9683e1..0a8f69de 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -74,6 +74,19 @@ local =
     accelerate~=0.31.0
     torch~=2.2.1
     transformers~=4.41.2
+finetuning =
+    accelerate~=0.32.1
+    bitsandbytes~=0.43.1
+    datasets~=2.20.0
+    hydra-core~=1.3.2
+    loguru~=0.7.2
+    neptune~=1.10.4
+    omegaconf~=2.3.0
+    peft~=0.11.1
+    sentencepiece~=0.2.0
+    torch~=2.3.0
+    transformers~=4.42.3
+    trl~=0.9.6
 
 [options.packages.find]
 where = src
diff --git a/src/dbally/llms/clients/local.py b/src/dbally/llms/clients/local.py
index d77be3f3..f984127e 100644
--- a/src/dbally/llms/clients/local.py
+++ b/src/dbally/llms/clients/local.py
@@ -2,6 +2,7 @@
 from typing import List, Optional, Union
 
 import torch
+from peft import PeftModel
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from dbally.audit.events import LLMEvent
@@ -41,6 +42,7 @@ def __init__(
         self,
         model_name: str,
         *,
+        adapter_name: Optional[str] = None,
         hf_api_key: Optional[str] = None,
     ) -> None:
         """
@@ -48,6 +50,7 @@ def __init__(
 
         Args:
             model_name: Name of the model to use.
+            adapter_name: The name of the LoRA adapter, if any, used to modify the model's weights.
             hf_api_key: The Hugging Face API key for authentication.
         """
 
@@ -56,6 +59,11 @@ def __init__(
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name, device_map="auto", torch_dtype=torch.bfloat16, token=hf_api_key
         )
+
+        if adapter_name:
+            self.model = PeftModel.from_pretrained(self.model, adapter_name)
+            self.model = self.model.merge_and_unload()
+
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_api_key)
 
     async def call(
diff --git a/src/dbally/llms/local.py b/src/dbally/llms/local.py
index 198513b3..48cec257 100644
--- a/src/dbally/llms/local.py
+++ b/src/dbally/llms/local.py
@@ -20,6 +20,7 @@ def __init__(
         model_name: str,
         default_options: Optional[LocalLLMOptions] = None,
         *,
+        adapter_name: Optional[str] = None,
         api_key: Optional[str] = None,
     ) -> None:
         """
@@ -28,12 +29,14 @@ def __init__(
         Args:
             model_name: Name of the model to use. This should be a model from the CausalLM class.
             default_options: Default options for the LLM.
+            adapter_name: The name of the LoRA adapter, if any, used to modify the model's weights.
             api_key: The API key for Hugging Face authentication.
         """
 
         super().__init__(model_name, default_options)
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_key)
         self.api_key = api_key
+        self.adapter_name = adapter_name
 
     @cached_property
     def client(self) -> LocalLLMClient:
@@ -43,7 +46,7 @@ def client(self) -> LocalLLMClient:
         Returns:
             The client used to interact with the LLM.
         """
-        return LocalLLMClient(model_name=self.model_name, hf_api_key=self.api_key)
+        return LocalLLMClient(model_name=self.model_name, adapter_name=self.adapter_name, hf_api_key=self.api_key)
 
     def count_tokens(self, prompt: PromptTemplate) -> int:
         """