From 88ba384647c91bfd14081058bce89fee4d59e930 Mon Sep 17 00:00:00 2001 From: flashssd Date: Wed, 15 Jan 2025 01:23:30 -0800 Subject: [PATCH] feat: accuracy by oly-type --- scripts/acc_by_type.sh | 27 +++++++++++++++++++++++++++ scripts/run_experiments.py | 2 ++ src/experiment.py | 12 ++++++++++-- 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 scripts/acc_by_type.sh diff --git a/scripts/acc_by_type.sh b/scripts/acc_by_type.sh new file mode 100644 index 0000000..3988147 --- /dev/null +++ b/scripts/acc_by_type.sh @@ -0,0 +1,27 @@ +source ./env.sh + +# Define the arrays +poly_types=("Ad-hoc" "Parametric" "Monomorphic" "Polymorphic") +models=("api") +pure_values=("true" "false") + +# Loop through each combination +for poly_type in "${poly_types[@]}"; do + for model in "${models[@]}"; do + for pure in "${pure_values[@]}"; do + if [ "$pure" = "true" ]; then + python scripts/run_experiments.py \ + --poly_type="$poly_type" \ + --option="$model" \ + --pure="$pure" \ + --input_file="Benchmark-F.pure.json" + else + python scripts/run_experiments.py \ + --poly_type="$poly_type" \ + --option="$model" \ + --pure="$pure" \ + --input_file="Benchmark-F.json" + fi + done + done +done diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py index 19bdb1e..8c1ea19 100644 --- a/scripts/run_experiments.py +++ b/scripts/run_experiments.py @@ -17,6 +17,7 @@ def main( temperature: float = TEMPERATURE, repeat: int = 1, pure: bool = False, + poly_type: str = "all", ): port: int = 11434 @@ -51,6 +52,7 @@ def main( port=port, log_file=log_file, pure=pure, + poly_type=poly_type, ) diff --git a/src/experiment.py b/src/experiment.py index b653f4d..f37b71d 100644 --- a/src/experiment.py +++ b/src/experiment.py @@ -138,6 +138,7 @@ def main( temperature: float = TEMPERATURE, port: int = 11434, pure: bool = False, + poly_type: str = "all", ): """ Run an experiment using various AI models to generate and evaluate type signatures. @@ -178,7 +179,7 @@ def main( if output_file is None: os.makedirs("result", exist_ok=True) - output_file = f"result/{model}.txt" + output_file = f"result/{model}-{poly_type}.txt" if log_file is None: log_file = "evaluation_log.jsonl" @@ -205,7 +206,14 @@ def main( generate = get_ollama_model(client, model, seed, temperature, pure) with open(input_file, "r") as fp: - tasks = [from_dict(data_class=BenchmarkTask, data=d) for d in json.load(fp)] + if poly_type == "all": + tasks = [from_dict(data_class=BenchmarkTask, data=d) for d in json.load(fp)] + else: + tasks = [ + from_dict(data_class=BenchmarkTask, data=d) + for d in json.load(fp) + if d["poly_type"] == poly_type + ] prompts = lmap(lambda x: get_prompt(x, full_type), tasks) responses = lmap(generate, tqdm(prompts, desc=model))