diff --git a/all_glosses/data_selection.py b/all_glosses/data_selection.py
new file mode 100644
index 0000000..d927527
--- /dev/null
+++ b/all_glosses/data_selection.py
@@ -0,0 +1,42 @@
+
+import math
+import torch
+import torch.nn as nn
+from collections import Counter
+from torch import Tensor
+import io
+import time
+import os
+import pandas as pd
+import json
+from datetime import datetime
+
+
+
+def read(text_info, mms_info):
+    data_list = []
+    (text_directory, text_encoding) = text_info
+    print("text_directory: ", text_directory)
+    (mms_directory, mms_encoding) = mms_info
+    for filenumber in os.listdir(text_directory):
+        f = os.path.join(mms_directory, filenumber+".mms")
+        try:
+            df = pd.read_csv(f, na_filter=False, encoding=mms_encoding)  # to overcome nan problem in dom and ndom glosses
+        except FileNotFoundError as e:
+            print(f"WARNING: Text file exists while mms file does not, skipping: {e}")
+            continue   
+
+        text_address = os.path.join(text_directory, filenumber, "gebaerdler.Text_Deutsch.annotation~")
+        file = open(text_address, encoding=text_encoding)
+        lines = file.readlines()
+        text_line = ""
+        for i, text_data in enumerate(lines):
+            if i>0:
+                text_line = text_line + " " + text_data.replace("\n", "").split(";")[2] 
+            else:
+                text_line = text_line + text_data.replace("\n", "").split(";")[2]
+        glosses = df["maingloss"] + "_" + df["domgloss"] + "_" + df["ndomgloss"]
+        gloss_line = " ".join(glosses.tolist())
+        data_dict = {"file_ID":filenumber, "text": text_line, "gloss": gloss_line}
+        data_list.append(data_dict)
+    return data_list
\ No newline at end of file
diff --git a/all_glosses/datasets.py b/all_glosses/datasets.py
new file mode 100644
index 0000000..e9b955a
--- /dev/null
+++ b/all_glosses/datasets.py
@@ -0,0 +1,96 @@
+import math
+import torch
+from torch import Tensor
+import io
+import time
+import os
+import pandas as pd
+import json
+from datetime import datetime
+import pickle
+from pathlib import Path
+from torch.utils.data import Dataset
+from collections import Counter
+from torch.nn.utils.rnn import pad_sequence
+import torchtext
+from torchtext.data.utils import get_tokenizer
+from collections import Counter
+from torchtext.vocab import vocab
+import numpy as np
+from transformers import AutoTokenizer
+import torch.nn.functional as F
+from pathlib import Path
+from . import data_selection
+
+mms_directories = [
+    ("mms-subset91", 'latin-1'),
+    ("modified/location/mms", 'utf-8'),
+    ("modified/platform/mms", 'utf-8'),
+    ("modified/time/mms", 'utf-8'),
+    ("modified/train_name/mms", 'utf-8'),
+]
+text_directories = [
+    ("annotations_full/annotations", 'latin-1'),
+    ("modified/location/text", 'utf-8'),
+    ("modified/platform/text", 'utf-8'),
+    ("modified/time/text", 'utf-8'),
+    ("modified/train_name/text", 'utf-8'),
+]
+
+checkpoint = 'facebook/nllb-200-distilled-600M' #for nllb
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+def read():
+    data_list_only_original = []
+    data_list_only_modified = []
+    for i, text_info in enumerate(text_directories):
+        mms_info = mms_directories[i]
+        data_list_one = data_selection.read(text_info, mms_info)
+        if i <= 0:
+            data_list_only_original += data_list_one
+        else:
+            data_list_only_modified += data_list_one
+
+    data_list_full = data_list_only_original + data_list_only_modified
+
+    return (data_list_only_original, data_list_only_modified, data_list_full)
+
+
+class SignLanguageDataset(Dataset):
+    def __init__(self, data_list, tokenizer, max_length=512):
+        self.data_list = data_list
+        self.tokenizer = tokenizer
+        self.max_length = max_length
+        self.vocab_size = len(tokenizer)
+ 
+    def __len__(self):
+        return len(self.data_list)
+
+    def __getitem__(self, idx):
+        data = self.data_list[idx]
+        file_Id = data['file_ID']
+        text_tokens = self.tokenizer.encode(data['text'], add_special_tokens=True)
+        text_tokens = torch.tensor(text_tokens)
+
+        maingloss_tokens = self.tokenizer.encode(''.join(data['gloss']).lower(), add_special_tokens=True)
+        maingloss_tokens = torch.tensor(maingloss_tokens)
+
+        return file_Id, text_tokens, maingloss_tokens
+
+        return file_Id, text_tokens, gloss_tokens
+
+
+def collate_fn(batch):
+    file_Id, text_tokens, gloss_tokens = zip(*batch)
+    padding_value = tokenizer.pad_token_id  # here for nllb paddign token is 1
+
+    text_tokens_padded = torch.nn.utils.rnn.pad_sequence(text_tokens, batch_first=True, padding_value=padding_value)
+    gloss_tokens_padded = torch.nn.utils.rnn.pad_sequence(gloss_tokens, batch_first=True, padding_value=padding_value)
+
+    # Ensure all have the same sequence length
+    max_len = max(text_tokens_padded.size(1), gloss_tokens_padded.size(1))
+
+    text_tokens_padded = torch.nn.functional.pad(text_tokens_padded, (0, max_len - text_tokens_padded.size(1)), value=padding_value)
+    gloss_tokens_padded = torch.nn.functional.pad(gloss_tokens_padded, (0, max_len - gloss_tokens_padded.size(1)), value=padding_value)
+
+    return file_Id, text_tokens_padded, gloss_tokens_padded
diff --git a/all_glosses/nllb.py b/all_glosses/nllb.py
new file mode 100644
index 0000000..ee90cbc
--- /dev/null
+++ b/all_glosses/nllb.py
@@ -0,0 +1,287 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import pickle
+import os 
+from sacrebleu.metrics import BLEU
+from nltk.translate.bleu_score import sentence_bleu
+from . import datasets
+from pathlib import Path
+from sklearn.model_selection import KFold
+from torch.utils.data import DataLoader
+import time
+from enum import Enum, verify, UNIQUE
+
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+checkpoint = 'facebook/nllb-200-distilled-600M' #for nllb
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+def train(fold, ds,  augment):
+
+    if not augment:
+        augment_dir = "original_data"
+    else:
+        augment_dir = "aug_data"
+    save_folder = os.path.join("/ds/videos/AVASAG/allgloss_tg/", augment_dir, "nllb") 
+    save_file_path = os.path.join(save_folder, "result")
+    Path(save_folder).mkdir(parents=True, exist_ok=True)
+
+    (original, modified, full) = ds
+    dataset = original
+
+    # Split the dataset into 10 folds
+    kf = KFold(n_splits=10, shuffle=True, random_state=42)
+    folds = list(kf.split(dataset))
+
+    # Split the dataset into train and test sets based on the current fold
+    train_indices = [idx for fold_idx, idx in enumerate(folds[fold][0]) if fold_idx != fold]
+    test_indices = folds[fold][1]
+    train_data = [dataset[idx] for idx in train_indices]
+    test_data = [dataset[idx] for idx in test_indices]
+
+    # Augment the training data if augment=True
+    if augment:
+        train_data = augment_data(train_data, modified)
+        
+    train_dataset = datasets.SignLanguageDataset(train_data, tokenizer)
+    train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=datasets.collate_fn)
+
+    test_dataset = datasets.SignLanguageDataset(test_data, tokenizer)
+    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=datasets.collate_fn)
+
+    NUM_EPOCHS = 1000
+    loss_graf = []
+
+    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint).to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+    train_log = open(save_file_path+ f"_fold_{fold}_train_log.txt", 'w')
+
+    best_epoch = 0
+
+    for epoch in range(1, NUM_EPOCHS+1):
+        start_time = time.time()
+        train_loss = train_epoch(model, train_dataloader, optimizer, tokenizer)
+
+        end_time = time.time()
+        log = "Epoch: " + str(epoch)+", Train loss: "+ str(train_loss)+" Epoch duration "+ str(end_time - start_time)+"\n"
+        train_log.write(log)
+        if epoch <= 1 or train_loss < min(loss_graf):
+            best_model_path = save_file_path+f"_fold_{fold}_best_model.pt"
+            torch.save(model.state_dict(), best_model_path)
+            log = "min so far is at epoch: "+ str(epoch)+"\n"
+            train_log.write(log)
+            best_epoch = epoch
+
+        loss_graf.append(train_loss)
+
+    log = "best epoch is: "+ str(best_epoch)
+    train_log.write(log)
+    train_log.close()
+
+    torch.save(model.state_dict(), save_file_path+f"_fold_{fold}_last_model.pt")
+
+    return test_dataloader, save_file_path
+    
+def extract_glosses(glosses):
+    main_glosses, dom_glosses, ndom_glosses = [], [], []
+    for gloss in glosses.split():
+        glosses_split = gloss.split("_")
+        if len(glosses_split) > 0:
+            main_glosses.append(glosses_split[0])
+        if len(glosses_split) > 1:
+            dom_glosses.append(glosses_split[1])
+        if len(glosses_split) > 2:
+            ndom_glosses.append(glosses_split[2])
+    return " ".join(main_glosses), " ".join(dom_glosses), " ".join(ndom_glosses)
+
+
+
+def count_length_comparisons(hypotheses, ground_truths):
+    counts = {
+        'num_P_T': sum(len(h.split()) > len(g.split()) for h, g in zip(hypotheses, ground_truths)),
+        'num_T_P': sum(len(h.split()) < len(g.split()) for h, g in zip(hypotheses, ground_truths)),
+        'num_e': sum(len(h.split()) == len(g.split()) for h, g in zip(hypotheses, ground_truths))
+    }
+    return counts     
+
+def save_results(fold, model_type, save_file_path, counts, bleus, ground_truths, hypotheses):
+    with open(save_file_path + f"_fold_{fold}_{model_type}_outputs.txt", "w") as f:
+        # Write BLEU scores for each gloss type
+        f.write("BLEU Scores:\n")
+        for gloss_type, score in bleus.items():
+            f.write(f"{gloss_type}: {score}\n")
+        
+        f.write("\nLength Comparison Counts:\n")
+        # Write counts for each gloss type
+        for gloss_type, count_dict in counts.items():
+            f.write(f"{gloss_type}:\n")
+            f.write(f"  P>T: {count_dict['num_P_T']}\n")
+            f.write(f"  T>P: {count_dict['num_T_P']}\n")
+            f.write(f"  Equal: {count_dict['num_e']}\n")
+
+        f.write("\nGround Truth and Predicted Texts:\n")
+        # Write ground truth and predictions for each sample
+        for i in range(len(ground_truths['maingloss'])):
+            f.write(f"\nSample {i+1}:\n")
+            f.write(f"Ground Truth (maingloss): {ground_truths['maingloss'][i]}\n")
+            f.write(f"Predicted (maingloss): {hypotheses['maingloss'][i]}\n")
+            f.write(f"Ground Truth (domgloss): {ground_truths['domgloss'][i]}\n")
+            f.write(f"Predicted (domgloss): {hypotheses['domgloss'][i]}\n")
+            f.write(f"Ground Truth (ndomgloss): {ground_truths['ndomgloss'][i]}\n")
+            f.write(f"Predicted (ndomgloss): {hypotheses['ndomgloss'][i]}\n")
+
+def calculate_bleu(hypotheses, references):
+    scores = []
+    for hyp, ref in zip(hypotheses, references):
+        ref = [ref.split()]  
+        hyp = hyp.split()  
+        score = sentence_bleu(ref, hyp, weights=(1, 0, 0, 0))  # BLEU-1
+        scores.append(score)
+    return sum(scores) / len(scores) if scores else 0.0  # Average BLEU-1
+
+
+def evaluate(fold, model_type, model_name, test_dataloader, save_file_path): # Evaluation
+    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint).to(device)
+    model.load_state_dict(torch.load(save_file_path+f"_fold_{fold}_{model_type}_{model_name}"))
+
+    ground_truths = {
+        'maingloss': [],
+        'domgloss': [],
+        'ndomgloss': []
+    }
+    hypotheses = {
+        'maingloss': [],
+        'domgloss': [],
+        'ndomgloss': []
+    }
+
+    model.eval()
+    with torch.no_grad():
+        for batch in test_dataloader:
+            file_Id, text_tokens_padded, maingloss_tokens_padded = batch
+            text_tokens_padded = text_tokens_padded.to(device)
+            maingloss_tokens_padded = maingloss_tokens_padded.to(device)
+
+            pred = model.generate(input_ids=text_tokens_padded, max_length=maingloss_tokens_padded.size(1))
+
+            for i in range(text_tokens_padded.size(0)):
+                gt_maingloss = "".join(tokenizer.decode(maingloss_tokens_padded[i], skip_special_tokens=True))
+                input_text = tokenizer.decode(text_tokens_padded[i], skip_special_tokens=True)
+                text_predicted = tokenizer.decode(pred[i], skip_special_tokens=True)
+
+
+                main_glosses, dom_glosses, ndom_glosses = extract_glosses(gt_maingloss)
+                main_glosses_pred, dom_glosses_pred, ndom_glosses_pred = extract_glosses(text_predicted)
+
+
+                if fold == 9: # only for printing
+                    print("file_Id", file_Id)
+                    print(f"\nSample {len(gt_maingloss) + 1}:")
+                    print(f"Input Text: {input_text}")
+
+                    print(f"ground_truth_maingloss: {main_glosses}")
+                    print(f"ground_truth_domgloss: {dom_glosses}")
+                    print(f"ground_truth_ndomgloss: {ndom_glosses}")
+
+                    print(f"main_glosses_pred: {main_glosses_pred}")
+                    print(f"dom_glosses_pred: {dom_glosses_pred}")
+                    print(f"ndom_glosses_pred: {ndom_glosses_pred}")
+
+                ground_truths['maingloss'].append(main_glosses)
+                ground_truths['domgloss'].append(dom_glosses)
+                ground_truths['ndomgloss'].append(ndom_glosses)
+
+                hypotheses['maingloss'].append(main_glosses_pred)
+                hypotheses['domgloss'].append(dom_glosses_pred)
+                hypotheses['ndomgloss'].append(ndom_glosses_pred)                    
+ 
+                
+    # Calculate BLEU score 
+    bleu = BLEU()
+    bleus = {
+        'maingloss': bleu.corpus_score(hypotheses['maingloss'], [ground_truths['maingloss']]),
+        'domgloss': calculate_bleu(hypotheses['domgloss'], ground_truths['domgloss']),
+        'ndomgloss': calculate_bleu(hypotheses['ndomgloss'], ground_truths['ndomgloss'])
+    }
+
+
+    # Count lengths for each gloss type
+    counts = {key: count_length_comparisons(hypotheses[key], ground_truths[key]) for key in hypotheses}
+
+    # Save results to file
+    save_results(fold, model_type, save_file_path, counts, bleus, ground_truths, hypotheses)
+
+    return bleus['maingloss'].score, bleus['domgloss'], bleus['ndomgloss']
+
+
+def augment_data(train_data, sentences):
+
+    augmented_train_data = train_data.copy()
+    augmented_train_data.extend(sentences)
+
+    return augmented_train_data  
+
+def train_epoch(model, train_dataloader, optimizer, tokenizer):
+    model.train()
+    total_loss = 0
+    for batch_idx, batch in enumerate(train_dataloader):
+        file_Id, text_tokens_padded, maingloss_tokens_padded = batch
+        text_tokens_padded = text_tokens_padded.to(device)
+        maingloss_tokens_padded = maingloss_tokens_padded.to(device)  
+        input_attention_mask = (text_tokens_padded != tokenizer.pad_token_id).to(device)
+
+        optimizer.zero_grad()
+
+        output_final  = model(input_ids=text_tokens_padded, attention_mask=input_attention_mask, labels=maingloss_tokens_padded)
+        loss = output_final.loss    
+        total_loss += loss.item()
+        loss.backward()
+        optimizer.step()
+
+    avg_train_loss = total_loss / len(train_dataloader)     
+    return avg_train_loss
+
+if __name__ == "__main__":
+    import sys
+
+    original_scores = { 'best': { 'maingloss': [], 'domgloss': [], 'ndomgloss': [] },
+                    'last': { 'maingloss': [], 'domgloss': [], 'ndomgloss': [] } }
+
+    
+    augmented_scores = { 'best': { 'maingloss': [], 'domgloss': [], 'ndomgloss': [] },
+                    'last': { 'maingloss': [], 'domgloss': [], 'ndomgloss': [] } }
+
+    ds = datasets.read()    
+
+    for fold in range(10):
+        print(f"Current fold {fold}:")
+        print("Original data :")
+        test_dataloader, save_file_path = train(fold, ds, augment=False)
+        test_dataloader_1, save_file_path_1 = train(fold, ds, augment=True)
+        assert save_file_path != save_file_path_1
+        for model_type in ['best', 'last']:
+            print(f"{model_type.capitalize()} model:")
+            original_maingloss, original_domgloss,  original_ndomgloss = evaluate(fold, model_type, "model.pt", test_dataloader, save_file_path)
+            original_scores[model_type]['maingloss'].append(original_maingloss)
+            original_scores[model_type]['domgloss'].append(original_domgloss)
+            original_scores[model_type]['ndomgloss'].append(original_ndomgloss)
+
+            aug_maingloss, aug_domgloss,  aug_ndomgloss = evaluate(fold, model_type, "model.pt", test_dataloader_1, save_file_path_1)
+            augmented_scores[model_type]['maingloss'].append(aug_maingloss)
+            augmented_scores[model_type]['domgloss'].append(aug_domgloss)
+            augmented_scores[model_type]['ndomgloss'].append(aug_ndomgloss)
+
+
+    avg_original_scores = { model_type: { gloss: np.mean(original_scores[model_type][gloss]) for gloss in original_scores[model_type] } for model_type in original_scores }
+    avg_augmented_scores = { model_type: { gloss: np.mean(augmented_scores[model_type][gloss]) for gloss in augmented_scores[model_type] } for model_type in augmented_scores }
+
+
+    for model_type in ['best', 'last']:
+        for gloss in ['maingloss', 'domgloss', 'ndomgloss']:
+            print(f" BLEU score on original data for each fold {model_type}_model {gloss}: {original_scores[model_type][gloss]}")
+            print(f" BLEU score on augmented data for each fold {model_type}_model {gloss}: {augmented_scores[model_type][gloss]}")
+            print(f" Average BLEU score on original data for {model_type}_model {gloss}: {avg_original_scores[model_type][gloss]}")
+            print(f" Average BLEU score on augmented data for {model_type}_model {gloss}: {avg_augmented_scores[model_type][gloss]}")
+         
\ No newline at end of file
diff --git a/llama/data_selection.py b/llama/data_selection.py
new file mode 100644
index 0000000..6d9655b
--- /dev/null
+++ b/llama/data_selection.py
@@ -0,0 +1,97 @@
+import math
+import torch
+import torch.nn as nn
+from collections import Counter
+from torch import Tensor
+import io
+import time
+import os
+import pandas as pd
+import json
+from datetime import datetime
+from transformers import AutoTokenizer
+from torch.utils.data import Dataset, DataLoader
+from sklearn.model_selection import train_test_split
+from .utils import Translation
+
+features_names = ["maingloss"]
+mms_directories = [
+    ("mms-subset91", 'latin-1'),
+    ("modified/location/mms", 'utf-8'),
+    ("modified/platform/mms", 'utf-8'),
+    ("modified/time/mms", 'utf-8'),
+    ("modified/train_name/mms", 'utf-8'),
+]
+text_directories = [
+    ("annotations_full/annotations", 'latin-1'),
+    ("modified/location/text", 'utf-8'),
+    ("modified/platform/text", 'utf-8'),
+    ("modified/time/text", 'utf-8'),
+    ("modified/train_name/text", 'utf-8'),
+]
+
+def read(text_info, mms_info, translation):
+    data_list = []
+    (text_directory, text_encoding) = text_info
+    print("text_directory: ", text_directory)
+    (mms_directory, mms_encoding) = mms_info
+    for filenumber in os.listdir(text_directory):
+        f = os.path.join(mms_directory, filenumber+".mms")
+        try:
+            df = pd.read_csv(f, encoding=mms_encoding)
+        except FileNotFoundError as e:
+            print(f"WARNING: Text file exists while mms file does not, skipping: {e}")
+            continue
+
+        text_address = os.path.join(text_directory, filenumber, "gebaerdler.Text_Deutsch.annotation~")
+        file = open(text_address, encoding=text_encoding)
+        lines = file.readlines()
+        text_line = ""
+        for i, text_data in enumerate(lines):
+            if i>0:
+                text_line = text_line + " " + text_data.replace("\n", "").split(";")[2]
+            else:
+                text_line = text_line + text_data.replace("\n", "").split(";")[2]
+        for feature in features_names:
+            gloss_line = " ".join(df["maingloss"].tolist())
+        if translation == Translation.TextToGloss:
+            combined_line = f"{text_line} ###> {gloss_line}"  # text to gloss
+        elif translation == Translation.GlossToText:
+            combined_line = f"{gloss_line} ###> {text_line}"  # gloss to text
+        else:
+            raise ValueError("Invalid translation")
+        data_list.append({"text": combined_line})
+    return data_list
+
+def create_datasets(translation):
+    data_list_only_original = []
+    data_list_only_modified = []
+    for i, text_info in enumerate(text_directories):
+        mms_info = mms_directories[i]
+        data_list_one = read(text_info, mms_info, translation)
+        if i <= 0:
+            data_list_only_original += data_list_one
+        else:
+            data_list_only_modified += data_list_one
+
+    data_list_full = data_list_only_original + data_list_only_modified
+
+
+    train_data, temp_data = train_test_split(data_list_full, test_size=0.2, random_state=42)
+    val_data, test_data = train_test_split(temp_data, test_size=1/3, random_state=42)
+
+
+    if translation == Translation.TextToGloss:
+        translation_dir = "t2g_llama"
+    elif translation == Translation.GlossToText:
+        translation_dir = "g2t_llama"
+    else:
+        raise ValueError("Invalid translation")
+    with open(f"train_data_{translation_dir}.json", "w") as f:
+        json.dump(train_data, f)
+
+    with open(f"val_data_{translation_dir}.json", "w") as f:
+        json.dump(val_data, f)
+
+    with open(f"test_data_{translation_dir}.json", "w") as f:
+        json.dump(test_data, f)
diff --git a/llama/fine_tune.py b/llama/fine_tune.py
new file mode 100644
index 0000000..da6c3c4
--- /dev/null
+++ b/llama/fine_tune.py
@@ -0,0 +1,148 @@
+import torch
+import torch.nn as nn
+import numpy as np
+import transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import pickle
+import os
+from sacrebleu.metrics import BLEU
+from .data_selection import *
+from pathlib import Path
+from torch.utils.data import DataLoader
+import time
+from enum import Enum, verify, UNIQUE
+from transformers import BitsAndBytesConfig
+from huggingface_hub import login
+from datasets import Dataset, load_dataset
+from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
+from trl import SFTTrainer
+
+hf_access_token = os.getenv("HF_ACCESS_TOKEN")
+assert hf_access_token is not None, "You need to set the Hugging Face access token environment variable: export HF_ACCESS_TOKEN=hf_TODO"
+
+login(token = hf_access_token)
+
+def training(translation):
+
+    create_datasets(translation)
+
+    if translation == Translation.TextToGloss:
+        translation_dir = "t2g_llama"
+    elif translation == Translation.GlossToText:
+        translation_dir = "g2t_llama"
+    else:
+        raise ValueError("Invalid translation")
+
+
+    with open(f"train_data_{translation_dir}.json", "r") as f:
+        train_data = json.load(f)
+
+    with open(f"val_data_{translation_dir}.json", "r") as f:
+        val_data = json.load(f)
+
+    train_dataset = Dataset.from_list(train_data)
+    val_dataset = Dataset.from_list(val_data)
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    torch.cuda.empty_cache()
+    cache_dir = "/ds/videos/AVASAG/cache"
+    model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+
+    tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_access_token, cache_dir=cache_dir, add_eos_token=True)
+    # Set padding token
+    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.padding_side = "right"
+
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16
+    )
+
+    save_folder = os.path.join("/ds/videos/AVASAG/llama_finetune/", translation_dir)
+    sft_model_name = os.path.join(save_folder, "llama-31-it-8b-sft")
+    merged_model_name=os.path.join(save_folder, "llama-31-it-8b-sft-merged")
+
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=bnb_config, token=hf_access_token, cache_dir=cache_dir)
+
+    model = prepare_model_for_kbit_training(model)
+
+    modules = ["down_proj","up_proj","gate_proj"]
+
+    lora_config = LoraConfig(
+        r=64,
+        lora_alpha=32,
+        target_modules=modules,
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM"
+    )
+
+    model = get_peft_model(model, lora_config)
+
+    trainable, total = model.get_nb_trainable_parameters()
+    print(f"Trainable: {trainable} | total: {total} | Percentage: {trainable/total*100:.4f}%")
+
+    tokenizer.pad_token = tokenizer.eos_token
+    torch.cuda.empty_cache()
+
+    trainer = SFTTrainer(
+        model=model,
+        train_dataset=train_dataset,
+        eval_dataset=val_dataset,
+        dataset_text_field="text",
+        peft_config=lora_config,
+        args=transformers.TrainingArguments(
+            report_to=[],  # Disable logging
+            per_device_train_batch_size=1,
+            gradient_accumulation_steps=4,
+            warmup_ratio=0.03,
+            max_steps=1000,
+            learning_rate=2e-5,
+            logging_steps=1,
+            output_dir="/ds/videos/AVASAG/llama_finetune/outputs_{translation_dir}",
+            optim="paged_adamw_8bit",
+            save_strategy="epoch",
+            ddp_find_unused_parameters=False,
+        ),
+        data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
+    )
+    model.config.use_cache = False
+    trainer.train()
+
+    trainer.model.save_pretrained(sft_model_name)
+
+    base_model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        low_cpu_mem_usage=True,
+        return_dict=True,
+        torch_dtype=torch.float16,
+        device_map="auto",
+    )
+    merged_model = PeftModel.from_pretrained(base_model, sft_model_name)
+    merged_model = merged_model.merge_and_unload()
+
+    merged_model.save_pretrained(merged_model_name, safe_serialization=True)
+    tokenizer.save_pretrained(merged_model_name)
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) != 2:
+        print("Usage: python k_fold.py [--textTogloss|--glossTotext]")
+        sys.exit(1)
+
+    if sys.argv[1] == "--textTogloss":
+        print("Translating from Text to  Gloss")
+        translation = Translation.TextToGloss
+    elif sys.argv[1] == "--glossTotext":
+        print("Translating from Gloss to Text ")
+        translation = Translation.GlossToText
+    else:
+        print("You have to specify either --textTogloss or --glossTotext as an argument.")
+        sys.exit(1)
+
+    training(translation)
diff --git a/llama/inference.py b/llama/inference.py
new file mode 100644
index 0000000..30216a8
--- /dev/null
+++ b/llama/inference.py
@@ -0,0 +1,123 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import pickle
+import os
+from sacrebleu.metrics import BLEU
+from pathlib import Path
+from torch.utils.data import DataLoader
+import time
+from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
+from trl import SFTTrainer
+import bitsandbytes as bnb
+import transformers
+import json
+import pandas as pd
+from datasets import Dataset, load_dataset
+from .utils import Translation
+
+
+def evaluation(translation):
+
+    if translation == Translation.TextToGloss:
+        translation_dir = "t2g_llama"
+    elif translation == Translation.GlossToText:
+        translation_dir = "g2t_llama"
+    else:
+        raise ValueError("Invalid translation")
+
+    folder_path = os.path.join("/ds/videos/AVASAG/llama_finetune/", translation_dir)
+    merged_model_name = os.path.join(folder_path, "llama-31-it-8b-sft-merged")
+    cache_dir = "/ds/videos/AVASAG/cache"
+    model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16
+    )
+
+    model_finetune = AutoModelForCausalLM.from_pretrained(
+        merged_model_name,
+        local_files_only=True,
+        quantization_config=bnb_config,
+        device_map="auto"
+    )
+    tokenizer_finetune = AutoTokenizer.from_pretrained(
+        merged_model_name,
+        local_files_only=True,
+        add_eos_token=True)
+
+
+    with open(f'test_data_{translation_dir}.json', 'r') as f:
+        test_data = json.load(f)
+
+    # Initialize BLEU metric
+    bleu = BLEU()
+    references = []
+    predictions = []
+
+    # Loop through the test data and generate translations
+    for entry in test_data:
+        # Extract the text before and after ###>
+        my_text = entry["text"].split("###>")[0].strip()
+        prompt = my_text+" ###>"
+        assert entry["text"].startswith(prompt), f"Prompt not found in the text: {entry['text']}"
+        reference = entry["text"].split("###>")[1].strip()
+        print("Input is:", my_text)
+        print("Ground truth is:", reference)
+
+        # Tokenize and generate the translation
+        tokenized_input = tokenizer_finetune(prompt, return_tensors="pt")
+        input_ids = tokenized_input["input_ids"].cuda()
+        attention_mask = tokenized_input["attention_mask"].cuda()
+        reference_length = len(tokenizer_finetune(reference)["input_ids"])  # Get the number of tokens in reference
+
+
+        # Generate the translation using the model
+        generation_output = model_finetune.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            num_beams=6,
+            return_dict_in_generate=True,
+            output_scores=True,
+            max_new_tokens= reference_length
+        )
+
+        # Decode the generated output
+        for seq in generation_output.sequences:
+            output = tokenizer_finetune.decode(seq, skip_special_tokens=True).split("###>")[1].strip()
+            predictions.append(output)
+            print("Generated output:", output)
+            print("\n")
+
+        # Append the reference to the references list
+        references.append([reference])
+
+    # Calculate BLEU score
+    bleu_score = bleu.corpus_score(predictions, references)
+
+    # Print the BLEU score
+    print(f"BLEU Score: {bleu_score.score}")
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) != 2:
+        print("Usage: python k_fold.py [--textTogloss|--glossTotext]")
+        sys.exit(1)
+
+    if sys.argv[1] == "--textTogloss":
+        print("Translating from Text to  Gloss")
+        translation = Translation.TextToGloss
+    elif sys.argv[1] == "--glossTotext":
+        print("Translating from Gloss to Text ")
+        translation = Translation.GlossToText
+    else:
+        print("You have to specify either --textTogloss or --glossTotext as an argument.")
+        sys.exit(1)
+
+    evaluation(translation)