Skip to content

Commit 9d865c6

Browse files
committed
update repository
1 parent bea7f6e commit 9d865c6

27 files changed

+2420
-249
lines changed

.gitignore

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ __pycache__/
55
ssh_info.txt
66
data/full/*
77
aminer_templates/templates.ipynb
8+
aminer_templates/logs_to_df.ipynb
89
output/*
910
output-full/*
10-
old/*
11-
!output/README.md
12-
!output-full/README.md
11+
old/*

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LLMs_for_log_parsing
2-
This is the replication repository for the paper **[SoK: LLM-based Log Parsing](https://arxiv.org/abs/2504.04877)** (arXiv). For this systemization of knowledge (SoK), 30 papers, concerning LLM-based log parsing, were reviewed. The extracted features of each work can be found in the excel sheet [categories.xlsx](./documentation/categories.xlsx). The general process of LLM-based log parsing, derived from the reviewed papers, can be depicted as follows:
2+
This is the replication repository for **https://arxiv.org/abs/2504.04877** (arXiv). 29 papers concerning LLM-based log parsing were reviewed, seven of them were used for the benchmark. The systematic overview can be found in the excel sheet [categories_clean.xlsx](./documentation/categories.xlsx).
33

44
<img src="./documentation/LLM-based log parsing.png" width="700">
55

@@ -70,4 +70,12 @@ To evaluate everything and produce the result files and the plots you can also r
7070

7171
```
7272
python3 run_evaluation.py
73+
```
74+
75+
## Other
76+
77+
To find the right hyperparameters for the Audit dataset we simply run GridSearch over a selection of parameters. Since this is the baseline we let it run over the entire dataset to get the maximum possible performance:
78+
79+
```
80+
python3 parser_run-no-LLM-GridSearch.py
7381
```

audit_templates/Audit_2k.log

Lines changed: 2000 additions & 0 deletions
Large diffs are not rendered by default.

documentation/categories.xlsx

-20 KB
Binary file not shown.
15.3 KB
Binary file not shown.

documentation/xlsx_to_latex.ipynb

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -13,34 +13,54 @@
1313
},
1414
{
1515
"cell_type": "code",
16-
"execution_count": 8,
16+
"execution_count": 2,
1717
"metadata": {},
1818
"outputs": [],
1919
"source": [
20-
"df = pd.read_excel(\"categories.xlsx\", sheet_name=\"table_final\")"
20+
"df = pd.read_excel(\"categories_clean.xlsx\", sheet_name=\"table_final\")"
2121
]
2222
},
2323
{
2424
"cell_type": "code",
25-
"execution_count": 50,
25+
"execution_count": 3,
2626
"metadata": {},
27-
"outputs": [],
27+
"outputs": [
28+
{
29+
"data": {
30+
"text/plain": [
31+
"{'ED', 'FGA', 'FTA', 'GA', 'PA', 'PTA', 'RTA', 'other'}"
32+
]
33+
},
34+
"metadata": {},
35+
"output_type": "display_data"
36+
},
37+
{
38+
"data": {
39+
"text/plain": [
40+
"0.3275862068965517"
41+
]
42+
},
43+
"execution_count": 3,
44+
"metadata": {},
45+
"output_type": "execute_result"
46+
}
47+
],
2848
"source": [
29-
"# m_list = [m.replace(\" \", \"\").split(\",\") for m in df[\"Metrics\"].fillna(\"nan\").iloc[2:]]\n",
30-
"# m_sets = [set(m) for m in m_list]\n",
31-
"# m_full_list = sum(m_list, [])\n",
32-
"# m_full_list.remove(\"nan\")\n",
33-
"# m_full_set = set(m_full_list)\n",
34-
"# display(m_full_set)\n",
49+
"m_list = [m.replace(\" \", \"\").split(\",\") for m in df[\"Metrics\"].fillna(\"nan\").iloc[2:]]\n",
50+
"m_sets = [set(m) for m in m_list]\n",
51+
"m_full_list = sum(m_list, [])\n",
52+
"m_full_list.remove(\"nan\")\n",
53+
"m_full_set = set(m_full_list)\n",
54+
"display(m_full_set)\n",
3555
"\n",
36-
"# def jaccard_index(set1, set2):\n",
37-
"# # print(set1, set2)\n",
38-
"# intersection = len(set1.intersection(set2))\n",
39-
"# union = len(set1.union(set2))\n",
40-
"# return intersection / union if union != 0 else 0\n",
56+
"def jaccard_index(set1, set2):\n",
57+
" # print(set1, set2)\n",
58+
" intersection = len(set1.intersection(set2))\n",
59+
" union = len(set1.union(set2))\n",
60+
" return intersection / union if union != 0 else 0\n",
4161
"\n",
42-
"# mean_jaccard = sum([jaccard_index(s, m_full_set) for s in m_sets])/len(m_sets)\n",
43-
"# mean_jaccard"
62+
"mean_jaccard = sum([jaccard_index(s, m_full_set) for s in m_sets])/len(m_sets)\n",
63+
"mean_jaccard"
4464
]
4565
},
4666
{

output-GridSearch.zip

9.86 MB
Binary file not shown.

output.zip

56.3 KB
Binary file not shown.

parser_run-no-LLM-GridSearch.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
from utils.parser_utils import *
2+
3+
from run_parser import LILAC, LogBatcher, DivLog, LogPrompt, SelfLog, OpenLogParser, LLM_TD
4+
from run_parser import Drain, ULP, Brain, SPELL, AEL
5+
6+
from utils.evaluate import evaluate_metrics
7+
from itertools import product
8+
9+
def evaluate(dataset, parser, out_dir, corrected_LogHub=True,):
10+
limit = 2000
11+
input_dir = params["in_dir"]
12+
print(f"--- {dataset} - {parser}", flush=True)
13+
corrected_str = "_corrected" if corrected_LogHub else ""
14+
groundtruth_path = os.path.join(input_dir, dataset, f"{dataset}_{params["dataset_type"]}.log_structured{corrected_str}.csv")
15+
result_path = os.path.join(out_dir, f"{dataset}_{params["dataset_type"]}.log_structured.csv")
16+
#result_path = os.path.join(OUTPUT_FOLDER, parser, f"{dataset}_{params["dataset_type"]}.log_structured.csv")
17+
if not os.path.exists(result_path):
18+
print("Path doesn't exist:", result_path)
19+
raise FileNotFoundError
20+
df_result = evaluate_metrics(dataset, groundtruth_path, result_path, limit=limit)
21+
return df_result
22+
23+
parsers = {
24+
# baseline
25+
"Drain": Drain,
26+
# "ULP": ULP,
27+
"Brain": Brain,
28+
"SPELL": SPELL,
29+
"AEL": AEL,
30+
# unsupervised parsers
31+
# "OpenLogParser": OpenLogParser,
32+
# # "LogPrompt": LogPrompt,
33+
# "LLM_TD": LLM_TD,
34+
# "LogBatcher": LogBatcher,
35+
# # supervised parsers
36+
# "SelfLog": SelfLog,
37+
# "LILAC-2": LILAC,
38+
# "LILAC-4": LILAC,
39+
# "DivLog-2": DivLog,
40+
# "DivLog-4": DivLog,
41+
}
42+
43+
multiple_runs_list = list(parsers.keys())
44+
45+
datasets = [
46+
# 'Android',
47+
# 'Apache',
48+
# 'BGL',
49+
# 'HDFS',
50+
# 'HPC',
51+
# 'Hadoop',
52+
# 'HealthApp',
53+
# 'Linux',
54+
# 'Mac',
55+
# 'OpenSSH',
56+
# 'OpenStack',
57+
# 'Proxifier',
58+
# 'Spark',
59+
# 'Thunderbird',
60+
# 'Windows',
61+
# 'Zookeeper',
62+
"Audit" # custom
63+
]
64+
65+
model = "no-LLM"
66+
# model = "gpt-3.5-turbo" # openai api
67+
# model="deepseek-ai/DeepSeek-R1" # togetherai api
68+
# model = "deepseek-reasoner" # deepseek api
69+
# model = "codellama:7b-instruct" # ollama local api
70+
71+
dataset_type = "2k"
72+
73+
total_runs = 1
74+
75+
gs_params = {
76+
"Drain": {
77+
"depth": [4,5,6,7,8,9,10,11,12,13,14],
78+
"st": [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
79+
},
80+
"Brain": {
81+
"threshold": [2,3,4,5,6,7,8,9,10],
82+
},
83+
"AEL": {
84+
"minEventCount": [1,2,3,4,5,6,7,8,9,10,11,12],
85+
"merge_percent": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
86+
},
87+
"SPELL": {
88+
"tau": [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99],
89+
},
90+
}
91+
92+
params = {
93+
#"in_dir": DATA_FOLDER + "2k/",
94+
"in_dir": DATA_FOLDER + f"{dataset_type}/",
95+
"settings": settings,
96+
"dataset_type": dataset_type,
97+
"model": model,
98+
"log_format": True,
99+
"corrected_LH": True ### ATTENTION !!!!!! ###
100+
}
101+
102+
if __name__ == "__main__":
103+
output_folder = OUTPUT_FOLDER[:-1] + "-GridSearch/"
104+
for dataset in datasets: # per dataset
105+
params["dataset"] = dataset
106+
for parser_name, parser in parsers.items(): # per parser
107+
# Generate grid search combinations for the current parser
108+
param_grid = gs_params[parser_name]
109+
param_combinations = list(product(*param_grid.values()))
110+
param_names = list(param_grid.keys())
111+
112+
gs_results = []
113+
114+
for i, param_values in enumerate(param_combinations): # per parameter combination
115+
param_dict = dict(zip(param_names, param_values))
116+
if parser_name in ["Drain"]:
117+
params["settings"]["Audit"].update(param_dict)
118+
else:
119+
params["settings"][f"{parser_name}_settings"]["Audit"].update(param_dict)
120+
121+
run_dir = f"run{"_".join([f"{k}_{v}" for k, v in param_dict.items()])}"
122+
print(f"Running {parser_name} on {dataset}")
123+
out_dir = os.path.join(output_folder, model, parser_name, run_dir)
124+
params["out_dir"] = out_dir
125+
if not os.path.exists(out_dir):
126+
os.makedirs(out_dir)
127+
128+
runtime, invoc_time = parser.parse(**params)
129+
130+
df_result = evaluate(dataset, parser_name, out_dir, corrected_LogHub=params["corrected_LH"])
131+
df_result["params"] = [param_dict]
132+
133+
print(df_result)
134+
135+
gs_results.append(df_result)
136+
137+
gs_results_all = pd.concat(gs_results, ignore_index=True)
138+
# gs_results_all = gs_results_all.sort_values(by=["GA"], ascending=False)
139+
gs_results_all.to_csv(os.path.join(output_folder, parser_name + "_gs_results.csv"), index=False)
140+
print("Grid search results saved to:", os.path.join(output_folder, parser_name + "_gs_results.csv"))
141+

plots/all_models_aggregated.pdf

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)