-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
66 lines (57 loc) · 2.13 KB
/
main.py
File metadata and controls
66 lines (57 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pandas as pd
import argparse
from src.data.preprocessing import cluster_based_filter
from src.model.classification import classify
from src.model.parse_output import parse_single_file
from src.analysis.corpus_analysis import CorpusAnalysis
from src.analysis.predictions_analysis import PredictionsAnalysis
def main():
parser = argparse.ArgumentParser(description="Run O-Ster pipeline.")
parser.add_argument(
"--classify", action="store_true",
help="Run classification on the filtered dataset"
)
parser.add_argument(
"--parse", action="store_true",
help="Parse the classification output file"
)
parser.add_argument(
"--corpus-analysis", action="store_true",
help="Run analysis on the original corpus"
)
parser.add_argument(
"--prediction-analysis", action="store_true",
help="Run analysis on the predictions"
)
args = parser.parse_args()
# Load dataset
df = pd.read_csv("O-Ster dataset/original_dataset/open_stereotypes_corpus.csv", sep=",")
df_on5 = cluster_based_filter(df)
print("\n\n")
if args.classify:
print("Running classification...")
classify(
df=df_on5,
pred_path="./predictions",
processed_fileame="processed_dataset_1",
prediction_filename="classifications_1",
seed=42
)
if args.parse:
print("Parsing classification output...")
parse_single_file(
input_file="./predictions/classifications_1.csv",
output_file="./parsed_output/parsed_output_1.csv"
)
if args.corpus_analysis:
print("Running corpus analysis...")
corpus_analyzer = CorpusAnalysis(df=df, output_dir="./analysis_output")
corpus_analyzer.generate_full_report()
print("Corpus analysis completed.")
if args.prediction_analysis:
print("Running prediction analysis...")
prediction_analyzer = PredictionsAnalysis(output_dir="./analysis_output")
prediction_analyzer.generate_full_report()
print("Prediction analysis completed.")
if __name__ == "__main__":
main()