Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/cfg/run_quality_evaluation_cfg.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
prompt_cfg:
sys_msg: Compute benchmark quality metrics from existing scores.

quality_eval_cfg:
scores_root_dir: "/projects/DeepLesion/projects/automated_capability_evaluation/data/scores_sample"
scores_subdir: "scores"
prior_datasets:
- "/projects/DeepLesion/projects/automated_capability_evaluation/data/scores_sample/math-500"

capabilities_dir: "/projects/aieng/public/ace/artifacts/negin_ace/taks/math/"

real_data_dir: null

real_dataloader_config:
type: "huggingface"
dataset_name: "HuggingFaceH4/MATH-500"
split: "test"
subset: null
text_field: "problem"

# embedding_backend: "openai" uses OpenAI embeddings, "huggingface" uses sentence-transformers
embedding_backend: "openai"
embedding_model: "text-embedding-3-large"
# embedding_dimensions is ignored for HuggingFace models (uses model's native dimension)
embedding_dimensions: 3072

diversity_metrics:
- "pad"
- "mmd"
- "mdm"

pad_classifier: "LogisticRegression" # Options: "LogisticRegression", "RandomForest", "MLP"

mmd_kernel: "polynomial" # Options: "polynomial", "rbf", "laplacian", "linear", "sigmoid"
mmd_degree: 3

mdm_n_clusters: 5
mdm_metric: "euclidean"

exp_cfg:
exp_id: "quality_evaluation"

defaults:
- _self_


Loading
Loading