diff --git a/.gitignore b/.gitignore index ea138653..14411ca4 100644 --- a/.gitignore +++ b/.gitignore @@ -196,3 +196,6 @@ uv.lock # AI docs CLAUDE.md +/mylitqaruns +/perplitqa +/MYSTORM diff --git a/MYSTORM.dvc b/MYSTORM.dvc new file mode 100644 index 00000000..083d5927 --- /dev/null +++ b/MYSTORM.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 3b9f3dfce362059f815d0f797a1d4ed1.dir + size: 15575711 + nfiles: 2 + hash: md5 + path: MYSTORM diff --git a/astabench/evals/labbench/litqa2/task.py b/astabench/evals/labbench/litqa2/task.py index e97089f3..e8ad5a5a 100644 --- a/astabench/evals/labbench/litqa2/task.py +++ b/astabench/evals/labbench/litqa2/task.py @@ -7,9 +7,11 @@ import random import string from typing import Literal, NamedTuple, cast +import os import pandas as pd from datasets import Dataset, DatasetDict, IterableDatasetDict, load_dataset +from huggingface_hub import hf_hub_download from inspect_ai import Task, task, task_with from inspect_ai.dataset import MemoryDataset, Sample from inspect_ai.scorer import ( @@ -201,11 +203,17 @@ def load_litqa2(split: Literal["dev", "test", "all"] = "all") -> list[dict]: if split == "all": return list(map(dict, dataset["train"])) - litqa_mappings = load_dataset( + p = hf_hub_download( ASTA_BENCH_DATASET_REPO, - data_files="tasks/labbench/litqa2_mapping.json", + "tasks/labbench/litqa2_mapping.json", + token=os.getenv("HF_TOKEN"), + repo_type="dataset", revision=ASTA_BENCH_DATASET_REVISION, ) + litqa_mappings = load_dataset( + "json", + data_files=p, + ) df = pd.merge( dataset["train"].to_pandas(), @@ -282,6 +290,8 @@ def litqa2(*litqa_args, **litqa_kwargs): textin/textout format instead of Inspect's `state.choices`. See litqa2() for more task details.""" + litqa_kwargs["with_search_tools"] = False + litqa_kwargs["split"] = "test" base_task = litqa2_inspect(*litqa_args, **litqa_kwargs) dataset = [] diff --git a/dvc.lock b/dvc.lock index 4680d3dd..ef0b7f8d 100644 --- a/dvc.lock +++ b/dvc.lock @@ -1563,8 +1563,8 @@ stages: deps: - path: dev_dvc_logs/scored/task_sqa_solver_storm.eval hash: md5 - md5: e7d45e7b258d42bf6ff558cd006704c7 - size: 11864550 + md5: 97f48e62c18b183287cdc4c6865e58cd + size: 12376461 outs: - path: dev_dvc_logs/errors/task_sqa_solver_storm.md hash: md5 @@ -1572,8 +1572,8 @@ stages: size: 63 - path: dev_dvc_logs/scores/task_sqa_solver_storm.md hash: md5 - md5: 0f45e2a63de4bb733c9ccaa0096d2cfd - size: 258 + md5: 75fd2c3700e0909c5f0b6dfe9b0b761b + size: 255 log_any_remaining_errors_and_record_scores@scispace-test: cmd: echo "Collecting errors";[[ "scispace" == */* ]] && mkdir -p test_dvc_logs/errors/task_sqa_solver_$(dirname "scispace"); mkdir -p test_dvc_logs/scores/task_sqa_solver_$(dirname "scispace"); @@ -2042,21 +2042,21 @@ stages: deps: - path: dev_dvc_logs/scored/task_sqa_solver_storm.eval hash: md5 - md5: e7d45e7b258d42bf6ff558cd006704c7 - size: 11864550 + md5: 97f48e62c18b183287cdc4c6865e58cd + size: 12376461 outs: - path: dev_dvc_logs/debug_logs/task_sqa_solver_storm_answer_precision_eval.csv hash: md5 - md5: c0c459c473d06d6ea34639f96c7cbd33 - size: 16555337 + md5: 47b464a14a21d661fd653783bad2660a + size: 16670397 - path: dev_dvc_logs/debug_logs/task_sqa_solver_storm_citation_eval.csv hash: md5 - md5: 330415d5d61ee3a7be3e34715d7b35f5 - size: 111187433 + md5: 429e713b73be994d1800ae31aaca19ce + size: 113172536 - path: dev_dvc_logs/debug_logs/task_sqa_solver_storm_rubric_eval.csv hash: md5 - md5: f44e2ca8fc7ef0f4a0852239526907e7 - size: 14538513 + md5: c88aa94a89a82f4417129e15c1984c9c + size: 15433813 create_nice_logs@fhouse_crow-test: cmd: echo "Creating logs"; [[ "fhouse_crow" == */* ]] && mkdir -p test_dvc_logs/debug_logs/task_sqa_solver_$(dirname "fhouse_crow"); uv run scripts/create_debug_logs.py test_dvc_logs/scored/ task_sqa_solver_fhouse_crow.eval @@ -2400,8 +2400,8 @@ stages: outs: - path: test_dvc_logs/solver_outputs/task_sqa_solver_storm.eval hash: md5 - md5: e9f0f203bcfba80faf017d7545d35ed6 - size: 4914841 + md5: b5dd1f633d5c33253de4969301614a19 + size: 5972462 create_nice_logs@anthropic/claude-sonnet-4-20250514-test: cmd: echo "Creating logs"; [[ "anthropic/claude-sonnet-4-20250514" == */* ]] && mkdir -p test_dvc_logs/debug_logs/task_sqa_solver_$(dirname "anthropic/claude-sonnet-4-20250514"); @@ -2551,13 +2551,13 @@ stages: deps: - path: test_dvc_logs/solver_outputs/task_sqa_solver_storm.eval hash: md5 - md5: e9f0f203bcfba80faf017d7545d35ed6 - size: 4914841 + md5: b5dd1f633d5c33253de4969301614a19 + size: 5972462 outs: - path: test_dvc_logs/model_responses/task_sqa_solver_storm_responses.csv hash: md5 - md5: 36f7918f26cea5397c9ef0ba7d183022 - size: 12043391 + md5: f654209834c01edafe19a8508f651ba2 + size: 12383893 extract_model_responses@sqa_claude-4.0-dev: cmd: echo "Extracting responses"; [[ "sqa_claude-4.0" == */* ]] && mkdir -p dev_dvc_logs/model_responses/task_sqa_solver_$(dirname "sqa_claude-4.0"); uv run scripts/extract_model_responses.py dev_dvc_logs/solver_outputs/task_sqa_solver_sqa_claude-4.0.eval @@ -2722,8 +2722,8 @@ stages: deps: - path: test_dvc_logs/scored/task_sqa_solver_storm.eval hash: md5 - md5: 623b39e2eefa43e4d80431478bb203d2 - size: 11677399 + md5: e3ff9965c9b0b5b16aecd9562322b064 + size: 15494227 outs: - path: test_dvc_logs/errors/task_sqa_solver_storm.md hash: md5 @@ -2731,7 +2731,7 @@ stages: size: 63 - path: test_dvc_logs/scores/task_sqa_solver_storm.md hash: md5 - md5: 694b9ca1ddf5101c219761bf3d462fdf + md5: 6639f2d3170b15cb08a9d49998b8d4d3 size: 255 create_nice_logs@storm-test: cmd: echo "Creating logs"; [[ "storm" == */* ]] && mkdir -p test_dvc_logs/debug_logs/task_sqa_solver_$(dirname @@ -2740,21 +2740,21 @@ stages: deps: - path: test_dvc_logs/scored/task_sqa_solver_storm.eval hash: md5 - md5: 623b39e2eefa43e4d80431478bb203d2 - size: 11677399 + md5: e3ff9965c9b0b5b16aecd9562322b064 + size: 15494227 outs: - path: test_dvc_logs/debug_logs/task_sqa_solver_storm_answer_precision_eval.csv hash: md5 - md5: ab52f71f30a9d2a597ebb34f88262d79 - size: 16080979 + md5: 9bfe7de194fed56642a0a642e0b26b8f + size: 16456085 - path: test_dvc_logs/debug_logs/task_sqa_solver_storm_citation_eval.csv hash: md5 - md5: d11a191fbad1750210a95913616e52b0 - size: 114307815 + md5: 23ace6f751c85838448516a1aa4b5c9a + size: 118676192 - path: test_dvc_logs/debug_logs/task_sqa_solver_storm_rubric_eval.csv hash: md5 - md5: 7f5ce9d87cebc8b11ed539f793ee9ea8 - size: 14245561 + md5: d17a5e5dd8873bb5c82b982a6e9940b9 + size: 17032728 score_all_solvers@model12-test: cmd: echo "Scoring";[[ "scispace" == */* ]] && mkdir -p test_dvc_logs/scored/task_sqa_solver_scispace; cp test_dvc_logs/solver_outputs/task_sqa_solver_scispace.eval test_dvc_logs/scored/task_sqa_solver_scispace.eval; @@ -2797,21 +2797,21 @@ stages: cmd: echo "Scoring";[[ "storm" == */* ]] && mkdir -p test_dvc_logs/scored/task_sqa_solver_storm; cp test_dvc_logs/solver_outputs/task_sqa_solver_storm.eval test_dvc_logs/scored/task_sqa_solver_storm.eval; uv run inspect score --overwrite --display plain --scorer astabench/evals/sqa/task.py@score_all - -S scorer_model=google/gemini-2.5-pro -S is_retrieverless=false test_dvc_logs/scored/task_sqa_solver_storm.eval + -S scorer_model=google/gemini-2.5-flash -S is_retrieverless=false test_dvc_logs/scored/task_sqa_solver_storm.eval deps: - path: test_dvc_logs/solver_outputs/task_sqa_solver_storm.eval hash: md5 - md5: e9f0f203bcfba80faf017d7545d35ed6 - size: 4914841 + md5: b5dd1f633d5c33253de4969301614a19 + size: 5972462 params: params.yaml: - scorer_model: google/gemini-2.5-pro + scorer_model: google/gemini-2.5-flash sqa_scorer_version: may-23-2025 outs: - path: test_dvc_logs/scored/task_sqa_solver_storm.eval hash: md5 - md5: 623b39e2eefa43e4d80431478bb203d2 - size: 11677399 + md5: e3ff9965c9b0b5b16aecd9562322b064 + size: 15494227 score_all_solvers@model13-test: cmd: echo "Scoring";[[ "fhouse_crow" == */* ]] && mkdir -p test_dvc_logs/scored/task_sqa_solver_fhouse_crow; cp test_dvc_logs/solver_outputs/task_sqa_solver_fhouse_crow.eval test_dvc_logs/scored/task_sqa_solver_fhouse_crow.eval; @@ -3472,7 +3472,7 @@ stages: cmd: echo "Scoring";[[ "storm" == */* ]] && mkdir -p dev_dvc_logs/scored/task_sqa_solver_storm; cp dev_dvc_logs/solver_outputs/task_sqa_solver_storm.eval dev_dvc_logs/scored/task_sqa_solver_storm.eval; uv run inspect score --overwrite --display plain --scorer astabench/evals/sqa/task.py@score_all - -S scorer_model=google/gemini-2.5-pro -S is_retrieverless=false dev_dvc_logs/scored/task_sqa_solver_storm.eval + -S scorer_model=google/gemini-2.5-flash -S is_retrieverless=false dev_dvc_logs/scored/task_sqa_solver_storm.eval deps: - path: dev_dvc_logs/solver_outputs/task_sqa_solver_storm.eval hash: md5 @@ -3480,13 +3480,13 @@ stages: size: 4996230 params: params.yaml: - scorer_model: google/gemini-2.5-pro + scorer_model: google/gemini-2.5-flash sqa_scorer_version: may-23-2025 outs: - path: dev_dvc_logs/scored/task_sqa_solver_storm.eval hash: md5 - md5: e7d45e7b258d42bf6ff558cd006704c7 - size: 11864550 + md5: 97f48e62c18b183287cdc4c6865e58cd + size: 12376461 score_all_solvers@model17-dev: cmd: echo "Scoring";[[ "perplexity_dr" == */* ]] && mkdir -p dev_dvc_logs/scored/task_sqa_solver_perplexity_dr; cp dev_dvc_logs/solver_outputs/task_sqa_solver_perplexity_dr.eval dev_dvc_logs/scored/task_sqa_solver_perplexity_dr.eval; @@ -3686,3 +3686,36 @@ stages: hash: md5 md5: 415d07f1989b99afd7298c761f9438fa size: 255 + run_litqa2_you: + cmd: INSPECT_EVAL_LOG_FILE_PATTERN=task_litqa2_solver_you uv run inspect eval + astabench/evals/labbench/litqa2/task.py@litqa2 --display plain --log-dir litqa2_dvc_logs/ + --solver astabench/solvers/youcom.py@youcom_solver -S api_type='research' --no-fail-on-error + --limit=1; mv "$(ls -t litqa2_dvc_logs/*task_litqa2_solver_you.eval 2>/dev/null + | head -n1)" "litqa2_dvc_logs/task_litqa2_solver_you.eval" + outs: + - path: litqa2_dvc_logs/task_litqa2_solver_you.eval + hash: md5 + md5: a5a1bebd61bce9b35751dccfa2c52a3c + size: 5358 + run_litqa2_perplexity: + cmd: INSPECT_EVAL_LOG_FILE_PATTERN=task_litqa2_solver_perplexity uv run inspect + eval astabench/evals/labbench/litqa2/task.py@litqa2 --display plain --log-dir + litqa2_dvc_logs/ --model 'perplexity/sonar-deep-research' --solver astabench/solvers/sqa/perplexity_base.py@perplexity_solver + --no-fail-on-error -T split=test --limit=1; mv "$(ls -t litqa2_dvc_logs/*task_litqa2_solver_perplexity.eval + 2>/dev/null | head -n1)" "litqa2_dvc_logs/task_litqa2_solver_perplexity.eval" + outs: + - path: litqa2_dvc_logs/task_litqa2_solver_perplexity.eval + hash: md5 + md5: 33ecb5e0bc723947ab12c0f92adfb9f9 + size: 9180 + run_litqa2_storm: + cmd: INSPECT_EVAL_LOG_FILE_PATTERN=task_litqa2_solver_storm uv run --extra storm + --python 3.11 inspect eval astabench/evals/labbench/litqa2/task.py@litqa2 --display + plain --log-dir litqa2_dvc_logs/ --solver astabench/solvers/sqa/storm_solver.py@storm_solver + --no-fail-on-error --limit=1; mv "$(ls -t litqa2_dvc_logs/*task_litqa2_solver_storm.eval + 2>/dev/null | head -n1)" "litqa2_dvc_logs/task_litqa2_solver_storm.eval" + outs: + - path: litqa2_dvc_logs/task_litqa2_solver_storm.eval + hash: md5 + md5: 2f0a063d8d4dffc382221751aa3be554 + size: 43306 diff --git a/dvc.yaml b/dvc.yaml index 1cbc85f6..95c54e0a 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -11,6 +11,35 @@ vars: suffix: "" stages: + # - perplexity + # - you + run_litqa2_you: + cmd: + INSPECT_EVAL_LOG_FILE_PATTERN=task_litqa2_solver_you + uv run --extra litqa2 --extra inspect_evals + inspect eval astabench/evals/labbench/litqa2/task.py@litqa2 + --display plain + --log-dir litqa2_dvc_logs/ + --solver astabench/solvers/youcom.py@youcom_solver -S api_type='research' + --no-fail-on-error + mv "$(ls -t litqa2_dvc_logs/*task_litqa2_solver_you.eval 2>/dev/null | head -n1)" "litqa2_dvc_logs/task_litqa2_solver_you.eval" + outs: + - litqa2_dvc_logs/task_litqa2_solver_you.eval + + run_litqa2_perplexity: + cmd: + INSPECT_EVAL_LOG_FILE_PATTERN=task_litqa2_solver_perplexity + uv run --extra litqa2 --extra inspect_evals + inspect eval astabench/evals/labbench/litqa2/task.py@litqa2 + --display plain + --log-dir litqa2_dvc_logs/ + --model 'perplexity/sonar-deep-research' + --solver astabench/solvers/sqa/perplexity_base.py@perplexity_solver + --no-fail-on-error + mv "$(ls -t litqa2_dvc_logs/*task_litqa2_solver_perplexity.eval 2>/dev/null | head -n1)" "litqa2_dvc_logs/task_litqa2_solver_perplexity.eval" + outs: + - litqa2_dvc_logs/task_litqa2_solver_perplexity.eval + solve_sqa: matrix: model: diff --git a/litqa2_dvc_logs/.gitignore b/litqa2_dvc_logs/.gitignore new file mode 100644 index 00000000..af0e9704 --- /dev/null +++ b/litqa2_dvc_logs/.gitignore @@ -0,0 +1,11 @@ +/task_litqa2_solver_you.eval +/task_litqa2_solver_perplexity.eval +/task_litqa2_solver_storm.eval +/2025-07-10T23-29-38+00-00_task_litqa2_solver_you.eval +/2025-07-10T23-31-08+00-00_task_litqa2_solver_you.eval +/2025-07-10T23-44-44+00-00_task_litqa2_solver_you.eval +/2025-07-10T23-57-48+00-00_task_litqa2_solver_you.eval +/2025-07-11T00-08-04+00-00_task_litqa2_solver_perplexity.eval +/2025-07-11T00-12-06+00-00_task_litqa2_solver_perplexity.eval +/2025-07-11T00-32-41+00-00_task_litqa2_solver_perplexity.eval +/2025-07-11T00-50-41+00-00_task_litqa2_solver_perplexity.eval diff --git a/litqa2_dvc_logs/2025-07-10T23-29-38+00-00_task_litqa2_solver_you.eval.dvc b/litqa2_dvc_logs/2025-07-10T23-29-38+00-00_task_litqa2_solver_you.eval.dvc new file mode 100644 index 00000000..499ba580 --- /dev/null +++ b/litqa2_dvc_logs/2025-07-10T23-29-38+00-00_task_litqa2_solver_you.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 5421087894711b618a9ecd7e47e51220 + size: 178009 + hash: md5 + path: 2025-07-10T23-29-38+00-00_task_litqa2_solver_you.eval diff --git a/litqa2_dvc_logs/2025-07-10T23-31-08+00-00_task_litqa2_solver_you.eval.dvc b/litqa2_dvc_logs/2025-07-10T23-31-08+00-00_task_litqa2_solver_you.eval.dvc new file mode 100644 index 00000000..340c675c --- /dev/null +++ b/litqa2_dvc_logs/2025-07-10T23-31-08+00-00_task_litqa2_solver_you.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 561e2784b97229f9fb6776037f36e677 + size: 3905491 + hash: md5 + path: 2025-07-10T23-31-08+00-00_task_litqa2_solver_you.eval diff --git a/litqa2_dvc_logs/2025-07-10T23-44-44+00-00_task_litqa2_solver_you.eval.dvc b/litqa2_dvc_logs/2025-07-10T23-44-44+00-00_task_litqa2_solver_you.eval.dvc new file mode 100644 index 00000000..9bfc091a --- /dev/null +++ b/litqa2_dvc_logs/2025-07-10T23-44-44+00-00_task_litqa2_solver_you.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: fa218239ebec00a7f13d1f04c4c7a0ea + size: 3715517 + hash: md5 + path: 2025-07-10T23-44-44+00-00_task_litqa2_solver_you.eval diff --git a/litqa2_dvc_logs/2025-07-10T23-57-48+00-00_task_litqa2_solver_you.eval.dvc b/litqa2_dvc_logs/2025-07-10T23-57-48+00-00_task_litqa2_solver_you.eval.dvc new file mode 100644 index 00000000..e2a3ca43 --- /dev/null +++ b/litqa2_dvc_logs/2025-07-10T23-57-48+00-00_task_litqa2_solver_you.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: b7c2dbdca4b47416656609ef1cb402c5 + size: 26888 + hash: md5 + path: 2025-07-10T23-57-48+00-00_task_litqa2_solver_you.eval diff --git a/litqa2_dvc_logs/2025-07-11T00-08-04+00-00_task_litqa2_solver_perplexity.eval.dvc b/litqa2_dvc_logs/2025-07-11T00-08-04+00-00_task_litqa2_solver_perplexity.eval.dvc new file mode 100644 index 00000000..44c2cd0d --- /dev/null +++ b/litqa2_dvc_logs/2025-07-11T00-08-04+00-00_task_litqa2_solver_perplexity.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: e22344cc100746418bf07dc3ba7d3850 + size: 492140 + hash: md5 + path: 2025-07-11T00-08-04+00-00_task_litqa2_solver_perplexity.eval diff --git a/litqa2_dvc_logs/2025-07-11T00-12-06+00-00_task_litqa2_solver_perplexity.eval.dvc b/litqa2_dvc_logs/2025-07-11T00-12-06+00-00_task_litqa2_solver_perplexity.eval.dvc new file mode 100644 index 00000000..85fa5202 --- /dev/null +++ b/litqa2_dvc_logs/2025-07-11T00-12-06+00-00_task_litqa2_solver_perplexity.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 1f345a57a81e815d4acde69804c08299 + size: 5516493 + hash: md5 + path: 2025-07-11T00-12-06+00-00_task_litqa2_solver_perplexity.eval diff --git a/litqa2_dvc_logs/2025-07-11T00-32-41+00-00_task_litqa2_solver_perplexity.eval.dvc b/litqa2_dvc_logs/2025-07-11T00-32-41+00-00_task_litqa2_solver_perplexity.eval.dvc new file mode 100644 index 00000000..75adff18 --- /dev/null +++ b/litqa2_dvc_logs/2025-07-11T00-32-41+00-00_task_litqa2_solver_perplexity.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: c7584f690f8f226b48c2335c22dab510 + size: 5554749 + hash: md5 + path: 2025-07-11T00-32-41+00-00_task_litqa2_solver_perplexity.eval diff --git a/litqa2_dvc_logs/2025-07-11T00-50-41+00-00_task_litqa2_solver_perplexity.eval.dvc b/litqa2_dvc_logs/2025-07-11T00-50-41+00-00_task_litqa2_solver_perplexity.eval.dvc new file mode 100644 index 00000000..b8f6acfd --- /dev/null +++ b/litqa2_dvc_logs/2025-07-11T00-50-41+00-00_task_litqa2_solver_perplexity.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: b7c8aae966d8d66f95f147e1650fa441 + size: 3420916 + hash: md5 + path: 2025-07-11T00-50-41+00-00_task_litqa2_solver_perplexity.eval diff --git a/litqa2_dvc_logs/task_litqa2_solver_storm.eval.dvc b/litqa2_dvc_logs/task_litqa2_solver_storm.eval.dvc new file mode 100644 index 00000000..caeeeee2 --- /dev/null +++ b/litqa2_dvc_logs/task_litqa2_solver_storm.eval.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 2f0a063d8d4dffc382221751aa3be554 + size: 43306 + hash: md5 + path: task_litqa2_solver_storm.eval diff --git a/mylitqaruns.dvc b/mylitqaruns.dvc new file mode 100644 index 00000000..87a1d16b --- /dev/null +++ b/mylitqaruns.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 01bc3be3ff112a6925df3e783eaa82f3.dir + size: 185154 + nfiles: 2 + hash: md5 + path: mylitqaruns diff --git a/params.yaml b/params.yaml index 3c623b25..43de8606 100644 --- a/params.yaml +++ b/params.yaml @@ -1,4 +1,4 @@ -scorer_model: "google/gemini-2.5-pro" +scorer_model: "google/gemini-2.5-flash" sqa_scorer_version: 'may-23-2025' sqa_solver_version: 'may-23-2025' limit: 1000 diff --git a/perplitqa.dvc b/perplitqa.dvc new file mode 100644 index 00000000..c7eb00e0 --- /dev/null +++ b/perplitqa.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 60e514efd32c3c49b582d67fd727e193.dir + size: 527892 + nfiles: 2 + hash: md5 + path: perplitqa diff --git a/pyproject.toml b/pyproject.toml index ffc2e220..602df9cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,7 +92,6 @@ include = [ [tool.uv] conflicts = [ [{extra = "inspect_evals"}, {extra = "sqa"}], - [{extra = "inspect_evals"}, {extra = "litqa2"}], [{extra = "inspect_evals"}, {extra = "storm"}], [{extra = "sqa"}, {extra = "litqa2"}], [{extra = "sqa"}, {extra = "storm"}],