Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ muss
*access
*SBU

# Debug/output files
output*.txt
error_log.txt
install_log.txt
*.log

# IDE settings
.vscode/
.idea/

*sub_captions
# C extensions
*.so
Expand Down
3 changes: 0 additions & 3 deletions .vscode/settings.json

This file was deleted.

6 changes: 3 additions & 3 deletions SimSum/Bart_baseline_finetuned.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from pytorch_lightning.trainer import seed_everything
from torch.optim import AdamW
from transformers import (
AdamW,
T5ForConditionalGeneration,
T5TokenizerFast,
BertTokenizer, BertForPreTraining,
Expand Down Expand Up @@ -215,7 +215,7 @@ def train_dataloader(self):
drop_last=True,
shuffle=True,
pin_memory=True,
num_workers=4)
num_workers=0)
t_total = ((len(dataloader.dataset) // (self.args.train_batch_size * max(1, self.args.n_gpu)))
// self.args.gradient_accumulation_steps
* float(self.args.num_train_epochs)
Expand All @@ -233,7 +233,7 @@ def val_dataloader(self):
sample_size=self.args.valid_sample_size)
return DataLoader(val_dataset,
batch_size=self.args.valid_batch_size,
num_workers=4)
num_workers=0)
@staticmethod
def add_model_specific_args(parent_parser):
p = ArgumentParser(parents=[parent_parser],add_help = False)
Expand Down
10 changes: 5 additions & 5 deletions SimSum/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def tokenize(sentence):
def write_lines(lines, filepath):
filepath = Path(filepath)
filepath.parent.mkdir(parents=True, exist_ok=True)
with filepath.open("w") as fout:
with filepath.open("w", encoding='utf-8') as fout:
for line in lines:
fout.write(line + '\n')

Expand All @@ -76,29 +76,29 @@ def read_lines(filepath):

def yield_lines(filepath):
filepath = Path(filepath)
with filepath.open('r') as f:
with filepath.open('r', encoding='utf-8') as f:
for line in f:
yield line.rstrip()


def yield_sentence_pair_with_index(filepath1, filepath2):
index = 0
with Path(filepath1).open('r') as f1, Path(filepath2).open('r') as f2:
with Path(filepath1).open('r', encoding='utf-8') as f1, Path(filepath2).open('r', encoding='utf-8') as f2:
for line1, line2 in zip(f1, f2):
index += 1
yield (line1.rstrip(), line2.rstrip(), index)


def yield_sentence_pair(filepath1, filepath2):
with Path(filepath1).open('r') as f1, Path(filepath2).open('r') as f2:
with Path(filepath1).open('r', encoding='utf-8') as f1, Path(filepath2).open('r', encoding='utf-8') as f2:
for line1, line2 in zip(f1, f2):
yield line1.rstrip(), line2.rstrip()


def count_line(filepath):
filepath = Path(filepath)
line_count = 0
with filepath.open("r") as f:
with filepath.open("r", encoding='utf-8') as f:
for line in f:
line_count += 1
return line_count
Expand Down
167 changes: 0 additions & 167 deletions env.yml

This file was deleted.

52 changes: 26 additions & 26 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
bert_score==0.3.11
click==8.0.4
keybert==0.7.0
matplotlib==3.5.2
nltk==3.7
numpy==1.23.1
optuna==2.10.1
pandas==1.4.3
plotly==5.14.1
python_Levenshtein==0.21.0
pytorch_lightning==1.7.0
rouge==1.0.1
sacrebleu==2.2.0
sacremoses==0.0.53
scikit_learn==1.2.2
simalign==0.3
spacy==3.4.1
stanfordnlp==0.2.0
summarizer==0.0.7
torch==1.12.1
torchfile==0.1.0
tqdm==4.64.0
transformers==4.21.1
tupa==1.4.2
ucca==1.3.11
yattag==1.15.1
bert_score>=0.3.11
click>=8.0.4
keybert>=0.7.0
matplotlib>=3.5.2
nltk>=3.7
numpy>=1.23.1
optuna>=2.10.1
pandas>=1.4.3
plotly>=5.14.1
python_Levenshtein>=0.21.0
pytorch_lightning>=1.7.0
rouge>=1.0.1
sacrebleu>=2.2.0
sacremoses>=0.0.53
scikit_learn>=1.2.2
simalign>=0.3
spacy>=3.4.1
stanfordnlp>=0.2.0
summarizer>=0.0.7
torch
torchfile>=0.1.0
tqdm>=4.64.0
transformers>=4.21.1
tupa>=1.4.2
ucca>=1.3.11
yattag>=1.15.1