epfml · milind899 · Dec 6, 2025 · Dec 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -14,6 +14,16 @@ muss
 *access
 *SBU
 
+# Debug/output files
+output*.txt
+error_log.txt
+install_log.txt
+*.log
+
+# IDE settings
+.vscode/
+.idea/
+
 *sub_captions
 # C extensions
 *.so

diff --git a/.vscode/settings.json b/.vscode/settings.json
diff --git a/SimSum/Bart_baseline_finetuned.py b/SimSum/Bart_baseline_finetuned.py
@@ -29,8 +29,8 @@
 from torch.utils.data import Dataset, DataLoader
 import pytorch_lightning as pl
 from pytorch_lightning.trainer import seed_everything
+from torch.optim import AdamW
 from transformers import (
-    AdamW,
     T5ForConditionalGeneration,
     T5TokenizerFast,
     BertTokenizer, BertForPreTraining,
@@ -215,7 +215,7 @@ def train_dataloader(self):
                                 drop_last=True,
                                 shuffle=True,
                                 pin_memory=True,
-                                num_workers=4)
+                                num_workers=0)
         t_total = ((len(dataloader.dataset) // (self.args.train_batch_size * max(1, self.args.n_gpu)))
                    // self.args.gradient_accumulation_steps
                    * float(self.args.num_train_epochs)
@@ -233,7 +233,7 @@ def val_dataloader(self):
                                  sample_size=self.args.valid_sample_size)
         return DataLoader(val_dataset,
                           batch_size=self.args.valid_batch_size,
-                          num_workers=4)
+                          num_workers=0)
     @staticmethod
     def add_model_specific_args(parent_parser):
       p = ArgumentParser(parents=[parent_parser],add_help = False)

diff --git a/SimSum/preprocessor.py b/SimSum/preprocessor.py
@@ -65,7 +65,7 @@ def tokenize(sentence):
 def write_lines(lines, filepath):
     filepath = Path(filepath)
     filepath.parent.mkdir(parents=True, exist_ok=True)
-    with filepath.open("w") as fout:
+    with filepath.open("w", encoding='utf-8') as fout:
         for line in lines:
             fout.write(line + '\n')
 
@@ -76,29 +76,29 @@ def read_lines(filepath):
 
 def yield_lines(filepath):
     filepath = Path(filepath)
-    with filepath.open('r') as f:
+    with filepath.open('r', encoding='utf-8') as f:
         for line in f:
             yield line.rstrip()
 
 
 def yield_sentence_pair_with_index(filepath1, filepath2):
     index = 0
-    with Path(filepath1).open('r') as f1, Path(filepath2).open('r') as f2:
+    with Path(filepath1).open('r', encoding='utf-8') as f1, Path(filepath2).open('r', encoding='utf-8') as f2:
         for line1, line2 in zip(f1, f2):
             index += 1
             yield (line1.rstrip(), line2.rstrip(), index)
 
 
 def yield_sentence_pair(filepath1, filepath2):
-    with Path(filepath1).open('r') as f1, Path(filepath2).open('r') as f2:
+    with Path(filepath1).open('r', encoding='utf-8') as f1, Path(filepath2).open('r', encoding='utf-8') as f2:
         for line1, line2 in zip(f1, f2):
             yield line1.rstrip(), line2.rstrip()
 
 
 def count_line(filepath):
     filepath = Path(filepath)
     line_count = 0
-    with filepath.open("r") as f:
+    with filepath.open("r", encoding='utf-8') as f:
         for line in f:
             line_count += 1
     return line_count

diff --git a/env.yml b/env.yml
diff --git a/requirements.txt b/requirements.txt
@@ -1,26 +1,26 @@
-bert_score==0.3.11
-click==8.0.4
-keybert==0.7.0
-matplotlib==3.5.2
-nltk==3.7
-numpy==1.23.1
-optuna==2.10.1
-pandas==1.4.3
-plotly==5.14.1
-python_Levenshtein==0.21.0
-pytorch_lightning==1.7.0
-rouge==1.0.1
-sacrebleu==2.2.0
-sacremoses==0.0.53
-scikit_learn==1.2.2
-simalign==0.3
-spacy==3.4.1
-stanfordnlp==0.2.0
-summarizer==0.0.7
-torch==1.12.1
-torchfile==0.1.0
-tqdm==4.64.0
-transformers==4.21.1
-tupa==1.4.2
-ucca==1.3.11
-yattag==1.15.1
+bert_score>=0.3.11
+click>=8.0.4
+keybert>=0.7.0
+matplotlib>=3.5.2
+nltk>=3.7
+numpy>=1.23.1
+optuna>=2.10.1
+pandas>=1.4.3
+plotly>=5.14.1
+python_Levenshtein>=0.21.0
+pytorch_lightning>=1.7.0
+rouge>=1.0.1
+sacrebleu>=2.2.0
+sacremoses>=0.0.53
+scikit_learn>=1.2.2
+simalign>=0.3
+spacy>=3.4.1
+stanfordnlp>=0.2.0
+summarizer>=0.0.7
+torch
+torchfile>=0.1.0
+tqdm>=4.64.0
+transformers>=4.21.1
+tupa>=1.4.2
+ucca>=1.3.11
+yattag>=1.15.1