ait-aecid
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 3 deletions b/‎.gitignore‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 9 additions & 1 deletion b/‎README.md‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎audit_templates/Audit_2k.log‎
Lines changed: 2000 additions & 0 deletions b/‎audit_templates/Audit_2k.log‎
Lines changed: 2000 additions & 0 deletions
diff --git a/‎documentation/categories.xlsx‎
-20 KB b/‎documentation/categories.xlsx‎
-20 KB
diff --git a/‎documentation/categories_clean.xlsx‎
15.3 KB b/‎documentation/categories_clean.xlsx‎
15.3 KB
diff --git a/‎documentation/xlsx_to_latex.ipynb‎
Lines changed: 38 additions & 18 deletions b/‎documentation/xlsx_to_latex.ipynb‎
Lines changed: 38 additions & 18 deletions
diff --git a/‎output-GridSearch.zip‎
9.86 MB b/‎output-GridSearch.zip‎
9.86 MB
diff --git a/‎output.zip‎
56.3 KB b/‎output.zip‎
56.3 KB
diff --git a/‎parser_run-no-LLM-GridSearch.py‎
Lines changed: 141 additions & 0 deletions b/‎parser_run-no-LLM-GridSearch.py‎
Lines changed: 141 additions & 0 deletions
diff --git a/‎plots/all_models_aggregated.pdf‎
0 Bytes b/‎plots/all_models_aggregated.pdf‎
0 Bytes
@@ -5,8 +5,7 @@ __pycache__/
 ssh_info.txt
 data/full/*
 aminer_templates/templates.ipynb
+aminer_templates/logs_to_df.ipynb
 output/*
 output-full/*
-old/*
-!output/README.md
-!output-full/README.md
+old/*
@@ -1,5 +1,5 @@
 # LLMs_for_log_parsing
-This is the replication repository for the paper **[SoK: LLM-based Log Parsing](https://arxiv.org/abs/2504.04877)** (arXiv). For this systemization of knowledge (SoK), 30 papers, concerning LLM-based log parsing, were reviewed. The extracted features of each work can be found in the excel sheet [categories.xlsx](./documentation/categories.xlsx). The general process of LLM-based log parsing, derived from the reviewed papers, can be depicted as follows:
+This is the replication repository for **https://arxiv.org/abs/2504.04877** (arXiv). 29 papers concerning LLM-based log parsing were reviewed, seven of them were used for the benchmark. The systematic overview can be found in the excel sheet [categories_clean.xlsx](./documentation/categories.xlsx).
 
 <img src="./documentation/LLM-based log parsing.png" width="700">
 
@@ -70,4 +70,12 @@ To evaluate everything and produce the result files and the plots you can also r
 
 ```
 python3 run_evaluation.py
+```
+
+## Other
+
+To find the right hyperparameters for the Audit dataset we simply run GridSearch over a selection of parameters. Since this is the baseline we let it run over the entire dataset to get the maximum possible performance:
+
+```
+python3 parser_run-no-LLM-GridSearch.py
 ```
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,34 +13,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = pd.read_excel(\"categories.xlsx\", sheet_name=\"table_final\")"
+    "df = pd.read_excel(\"categories_clean.xlsx\", sheet_name=\"table_final\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ED', 'FGA', 'FTA', 'GA', 'PA', 'PTA', 'RTA', 'other'}"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.3275862068965517"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# m_list = [m.replace(\" \", \"\").split(\",\") for m in df[\"Metrics\"].fillna(\"nan\").iloc[2:]]\n",
-    "# m_sets = [set(m) for m in m_list]\n",
-    "# m_full_list = sum(m_list, [])\n",
-    "# m_full_list.remove(\"nan\")\n",
-    "# m_full_set = set(m_full_list)\n",
-    "# display(m_full_set)\n",
+    "m_list = [m.replace(\" \", \"\").split(\",\") for m in df[\"Metrics\"].fillna(\"nan\").iloc[2:]]\n",
+    "m_sets = [set(m) for m in m_list]\n",
+    "m_full_list = sum(m_list, [])\n",
+    "m_full_list.remove(\"nan\")\n",
+    "m_full_set = set(m_full_list)\n",
+    "display(m_full_set)\n",
     "\n",
-    "# def jaccard_index(set1, set2):\n",
-    "#     # print(set1, set2)\n",
-    "#     intersection = len(set1.intersection(set2))\n",
-    "#     union = len(set1.union(set2))\n",
-    "#     return intersection / union if union != 0 else 0\n",
+    "def jaccard_index(set1, set2):\n",
+    "    # print(set1, set2)\n",
+    "    intersection = len(set1.intersection(set2))\n",
+    "    union = len(set1.union(set2))\n",
+    "    return intersection / union if union != 0 else 0\n",
     "\n",
-    "# mean_jaccard = sum([jaccard_index(s, m_full_set) for s in m_sets])/len(m_sets)\n",
-    "# mean_jaccard"
+    "mean_jaccard = sum([jaccard_index(s, m_full_set) for s in m_sets])/len(m_sets)\n",
+    "mean_jaccard"
    ]
   },
   {
 
@@ -0,0 +1,141 @@
+from utils.parser_utils import *
+
+from run_parser import LILAC, LogBatcher, DivLog, LogPrompt, SelfLog, OpenLogParser, LLM_TD
+from run_parser import Drain, ULP, Brain, SPELL, AEL
+
+from utils.evaluate import evaluate_metrics
+from itertools import product
+
+def evaluate(dataset, parser, out_dir, corrected_LogHub=True,):
+    limit = 2000
+    input_dir = params["in_dir"]
+    print(f"--- {dataset} - {parser}", flush=True)
+    corrected_str = "_corrected" if corrected_LogHub else ""
+    groundtruth_path = os.path.join(input_dir, dataset, f"{dataset}_{params["dataset_type"]}.log_structured{corrected_str}.csv")
+    result_path = os.path.join(out_dir, f"{dataset}_{params["dataset_type"]}.log_structured.csv")
+    #result_path = os.path.join(OUTPUT_FOLDER, parser, f"{dataset}_{params["dataset_type"]}.log_structured.csv")
+    if not os.path.exists(result_path):
+        print("Path doesn't exist:", result_path)
+        raise FileNotFoundError
+    df_result = evaluate_metrics(dataset, groundtruth_path, result_path, limit=limit)
+    return df_result
+
+parsers = {
+    # baseline
+    "Drain": Drain,
+    # "ULP": ULP,
+    "Brain": Brain,
+    "SPELL": SPELL,
+    "AEL": AEL,
+    # unsupervised parsers
+    # "OpenLogParser": OpenLogParser,
+    # # "LogPrompt": LogPrompt,
+    # "LLM_TD": LLM_TD,
+    # "LogBatcher": LogBatcher,
+    # # supervised parsers
+    # "SelfLog": SelfLog,
+    # "LILAC-2": LILAC,
+    # "LILAC-4": LILAC,
+    # "DivLog-2": DivLog,
+    # "DivLog-4": DivLog,
+}
+
+multiple_runs_list = list(parsers.keys())
+
+datasets = [
+    # 'Android',
+    # 'Apache',
+    # 'BGL',
+    # 'HDFS',
+    # 'HPC',
+    # 'Hadoop',
+    # 'HealthApp',
+    # 'Linux',
+    # 'Mac',
+    # 'OpenSSH',
+    # 'OpenStack',
+    # 'Proxifier',
+    # 'Spark',
+    # 'Thunderbird',
+    # 'Windows',
+    # 'Zookeeper',
+    "Audit" # custom
+]
+
+model = "no-LLM"
+# model = "gpt-3.5-turbo" # openai api
+# model="deepseek-ai/DeepSeek-R1" # togetherai api
+# model = "deepseek-reasoner" # deepseek api
+# model = "codellama:7b-instruct" # ollama local api
+
+dataset_type = "2k"
+
+total_runs = 1
+
+gs_params = {
+    "Drain": {
+        "depth": [4,5,6,7,8,9,10,11,12,13,14],
+        "st": [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
+    },
+    "Brain": {
+        "threshold": [2,3,4,5,6,7,8,9,10],
+    },
+    "AEL": {
+        "minEventCount": [1,2,3,4,5,6,7,8,9,10,11,12],
+        "merge_percent": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+    },
+    "SPELL": {
+        "tau": [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99],
+    },
+}
+
+params = {
+    #"in_dir": DATA_FOLDER + "2k/",
+    "in_dir": DATA_FOLDER + f"{dataset_type}/",
+    "settings": settings,
+    "dataset_type": dataset_type,
+    "model": model,
+    "log_format": True,
+    "corrected_LH": True ### ATTENTION !!!!!! ###
+}
+
+if __name__ == "__main__":
+    output_folder = OUTPUT_FOLDER[:-1] + "-GridSearch/"
+    for dataset in datasets: # per dataset
+        params["dataset"] = dataset
+        for parser_name, parser in parsers.items(): # per parser
+            # Generate grid search combinations for the current parser
+            param_grid = gs_params[parser_name]
+            param_combinations = list(product(*param_grid.values()))
+            param_names = list(param_grid.keys())
+
+            gs_results = []
+
+            for i, param_values in enumerate(param_combinations):  # per parameter combination
+                param_dict = dict(zip(param_names, param_values))
+                if parser_name in ["Drain"]:
+                    params["settings"]["Audit"].update(param_dict)
+                else: 
+                    params["settings"][f"{parser_name}_settings"]["Audit"].update(param_dict)
+
+                run_dir = f"run{"_".join([f"{k}_{v}" for k, v in param_dict.items()])}"
+                print(f"Running {parser_name} on {dataset}")
+                out_dir = os.path.join(output_folder, model, parser_name, run_dir)
+                params["out_dir"] = out_dir
+                if not os.path.exists(out_dir):
+                    os.makedirs(out_dir)
+
+                runtime, invoc_time = parser.parse(**params)
+
+                df_result = evaluate(dataset, parser_name, out_dir, corrected_LogHub=params["corrected_LH"])
+                df_result["params"] = [param_dict]
+
+                print(df_result)
+
+                gs_results.append(df_result)
+
+            gs_results_all = pd.concat(gs_results, ignore_index=True)
+            # gs_results_all = gs_results_all.sort_values(by=["GA"], ascending=False)
+            gs_results_all.to_csv(os.path.join(output_folder, parser_name + "_gs_results.csv"), index=False)
+            print("Grid search results saved to:", os.path.join(output_folder, parser_name + "_gs_results.csv"))
+