diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index 871814593..6f5ee46c7 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -75,9 +75,6 @@ if [ "$1" == "advanced" ]; then
 fi
 
 if [ "$1" == "evaluation" ]; then
-
-    exit 0
-
         echo "::group::Create script to run evaluation"
         python3 torchchat/utils/scripts/updown.py --file torchchat/utils/docs/evaluation.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-evaluation.sh
         # for good measure, if something happened to updown processor,
diff --git a/torchchat/utils/docs/evaluation.md b/torchchat/utils/docs/evaluation.md
index 490500223..8bc995ca7 100644
--- a/torchchat/utils/docs/evaluation.md
+++ b/torchchat/utils/docs/evaluation.md
@@ -4,8 +4,13 @@
 
 # Evaluation Features
 
+<!--
+
 [shell default]: ./install/install_requirements.sh
 
+[shell default]: TORCHCHAT_ROOT=${PWD} ./torchchat/utils/scripts/install_et.sh
+
+-->
 
 Torchchat provides evaluation functionality for your language model on
 a variety of tasks using the
@@ -14,7 +19,7 @@ library.
 
 ## Usage
 
-The evaluation mode of `torchchat.py` script can be used to evaluate your language model on various tasks available in the `lm_eval` library such as "wikitext". You can specify the task(s) you want to evaluate using the `--tasks` option, and limit the evaluation using the `--limit` option. If no task is specified, it will default to evaluating on "wikitext".
+The evaluation mode of `torchchat.py` script can be used to evaluate your language model on various tasks available in the `lm_eval` library such as "wikitext". You can specify the task(s) you want to evaluate using the `--tasks` option, and limit the evaluation using the `--limit` option. If no task is specified, the task will default to evaluating on "wikitext".
 
 **Examples**