diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs index 871814593..6f5ee46c7 100755 --- a/.ci/scripts/run-docs +++ b/.ci/scripts/run-docs @@ -75,9 +75,6 @@ if [ "$1" == "advanced" ]; then fi if [ "$1" == "evaluation" ]; then - - exit 0 - echo "::group::Create script to run evaluation" python3 torchchat/utils/scripts/updown.py --file torchchat/utils/docs/evaluation.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-evaluation.sh # for good measure, if something happened to updown processor, diff --git a/torchchat/utils/docs/evaluation.md b/torchchat/utils/docs/evaluation.md index 490500223..8bc995ca7 100644 --- a/torchchat/utils/docs/evaluation.md +++ b/torchchat/utils/docs/evaluation.md @@ -4,8 +4,13 @@ # Evaluation Features + Torchchat provides evaluation functionality for your language model on a variety of tasks using the @@ -14,7 +19,7 @@ library. ## Usage -The evaluation mode of `torchchat.py` script can be used to evaluate your language model on various tasks available in the `lm_eval` library such as "wikitext". You can specify the task(s) you want to evaluate using the `--tasks` option, and limit the evaluation using the `--limit` option. If no task is specified, it will default to evaluating on "wikitext". +The evaluation mode of `torchchat.py` script can be used to evaluate your language model on various tasks available in the `lm_eval` library such as "wikitext". You can specify the task(s) you want to evaluate using the `--tasks` option, and limit the evaluation using the `--limit` option. If no task is specified, the task will default to evaluating on "wikitext". **Examples**