Skip to content

Commit bea5e43

Browse files
committed
feat: added scripts for experiments
1 parent de779c0 commit bea5e43

File tree

3 files changed

+126
-0
lines changed

3 files changed

+126
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import os
2+
import json
3+
import pandas as pd
4+
import argparse
5+
6+
def process_logs(base_dir, filter_keyword, output_csv):
7+
data = []
8+
9+
for root, dirs, files in os.walk(base_dir):
10+
if filter_keyword in root and 'logs.json' in files:
11+
dataset_name = os.path.basename(root)
12+
dataset_name = dataset_name.split("_scoring")[0]
13+
log_path = os.path.join(root, 'logs.json')
14+
15+
with open(log_path, 'r') as f:
16+
log_data = json.load(f)
17+
18+
for metric_type, metrics in log_data['metrics'].items():
19+
for config in log_data['configs'][metric_type]:
20+
method = config['module_type']
21+
metric_name = config['metric_name']
22+
metric_value = config['metric_value']
23+
24+
row = {
25+
'dataset': dataset_name,
26+
'method': method,
27+
'metric': metric_name,
28+
'metric_value': metric_value
29+
}
30+
31+
for param, value in config['module_params'].items():
32+
row[param] = value
33+
34+
data.append(row)
35+
36+
df = pd.DataFrame(data)
37+
df.to_csv(output_csv, index=False)
38+
print(f"CSV file '{output_csv}' successfully created!")
39+
40+
if __name__ == "__main__":
41+
parser = argparse.ArgumentParser(description="Process logs and generate a CSV file.")
42+
parser.add_argument('--base_dir', type=str, required=True, help="Base directory where folders are located")
43+
parser.add_argument('--filter_keyword', type=str, required=True, help="Keyword to filter folders (e.g., 'multilabel')")
44+
parser.add_argument('--output_csv', type=str, required=True, help="Output CSV file name")
45+
args = parser.parse_args()
46+
47+
process_logs(args.base_dir, args.filter_keyword, args.output_csv)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
3+
DATA_PATH="experiments/intent_description"
4+
LOG_PATH="experiments/intent_description/multilabel"
5+
METRIC="scoring_hit_rate"
6+
USE_MULTILABEL=true
7+
CONFIG_SCRIPT_PATH="./update_metric.sh"
8+
9+
for FILE in "$DATA_PATH"/*.json; do
10+
FILENAME=$(basename "$FILE" .json)
11+
DATASET_NAME=$(echo "$FILENAME" | sed 's/_fix.*//')
12+
13+
# Determine the appropriate multilabel flag for the metric update script
14+
if [ "$USE_MULTILABEL" = true ]; then
15+
MULTILABEL_ARG="true"
16+
else
17+
MULTILABEL_ARG="false"
18+
fi
19+
20+
# Update the metric in the configuration file
21+
echo "Updating metric for dataset: $DATASET_NAME"
22+
$CONFIG_SCRIPT_PATH "$METRIC" "$MULTILABEL_ARG"
23+
if [ $? -ne 0 ]; then
24+
echo "Error updating metric for $DATASET_NAME. Exiting."
25+
exit 1
26+
fi
27+
28+
rm -rf runs/
29+
30+
echo "Processing dataset: $DATASET_NAME"
31+
autointent data.train_path="$FILE" \
32+
logs.dirpath="$LOG_PATH/${DATASET_NAME}_${METRIC}" \
33+
seed=42 \
34+
vector_index.device=cuda \
35+
hydra.job_logging.root.level=INFO \
36+
data.force_multilabel="$USE_MULTILABEL"
37+
38+
if [ $? -ne 0 ]; then
39+
echo "Error encountered while processing $FILE. Exiting."
40+
exit 1
41+
else
42+
echo "Successfully processed $FILE"
43+
fi
44+
done
45+
46+
echo "All datasets processed successfully."
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [ -z "$1" ] || [ -z "$2" ]; then
5+
echo "Usage: $0 <new_metric> <multilabel:true|false>"
6+
exit 1
7+
fi
8+
9+
NEW_METRIC="$1"
10+
MULTILABEL="$2"
11+
12+
# Determine the correct configuration file based on the multilabel argument
13+
if [ "$MULTILABEL" == "true" ]; then
14+
CONFIG_PATH="../../autointent/datafiles/default-multilabel-config.yaml"
15+
elif [ "$MULTILABEL" == "false" ]; then
16+
CONFIG_PATH="../../autointent/datafiles/default-multiclass-config.yaml"
17+
else
18+
echo "Invalid value for <multilabel>. Use 'true' or 'false'."
19+
exit 1
20+
fi
21+
22+
# Backup the original configuration file
23+
cp "$CONFIG_PATH" "${CONFIG_PATH}.bak"
24+
25+
# Update the metric value where node_type=scoring
26+
yq e "(.nodes[] | select(.node_type == \"scoring\") | .metric) = \"$NEW_METRIC\"" -i "$CONFIG_PATH"
27+
28+
if [ $? -eq 0 ]; then
29+
echo "Metric value successfully updated to '$NEW_METRIC' in $CONFIG_PATH where node_type=scoring"
30+
else
31+
echo "Failed to update the metric value in $CONFIG_PATH"
32+
exit 1
33+
fi

0 commit comments

Comments
 (0)