EleutherAI
diff --git a/‎lm_eval/evaluator.py‎
Lines changed: 11 additions & 3 deletions b/‎lm_eval/evaluator.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎lm_eval/filters/extraction.py‎
Lines changed: 1 addition & 1 deletion b/‎lm_eval/filters/extraction.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg.yaml‎ renamed to ‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg‎ b/‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg.yaml‎ renamed to ‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg‎
diff --git a/‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic.yaml‎ renamed to ‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic‎ b/‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic.yaml‎ renamed to ‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic‎
diff --git a/‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn.yaml‎ renamed to ‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn‎ b/‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn.yaml‎ renamed to ‎lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn‎
diff --git a/‎tests/test_tasks.py‎
Lines changed: 0 additions & 43 deletions b/‎tests/test_tasks.py‎
Lines changed: 0 additions & 43 deletions
diff --git a/‎tests/test_unitxt_tasks.py‎
Lines changed: 49 additions & 0 deletions b/‎tests/test_unitxt_tasks.py‎
Lines changed: 49 additions & 0 deletions
@@ -154,15 +154,23 @@ def simple_evaluate(
             "Either 'limit' or 'samples' must be None, but both are not None."
         )
 
+    _NEEDS_CHAT_TEMPLATE = ("inst", "chat")
     if (
-        (isinstance(model_args, str) and "inst" in model_args.lower())
+        (
+            isinstance(model_args, str)
+            and any(kw in model_args.lower() for kw in _NEEDS_CHAT_TEMPLATE)
+        )
         or (
             isinstance(model_args, dict)
-            and any("inst" in str(v).lower() for v in model_args.values())
+            and any(
+                any(kw in str(v).lower() for kw in _NEEDS_CHAT_TEMPLATE)
+                for v in model_args.values()
+            )
         )
     ) and not apply_chat_template:
         eval_logger.warning(
-            "Model appears to be an instruct variant but chat template is not applied. Recommend setting `apply_chat_template` (optionally `fewshot_as_multiturn`)."
+            "Model appears to be an instruct or chat variant but chat template is not applied. "
+            "Recommend setting `apply_chat_template` (optionally `fewshot_as_multiturn`)."
         )
 
     if delete_requests_cache:
 
@@ -141,7 +141,7 @@ def __init__(
         """
         regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
                         - step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response.
-                        - step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices.
+                        - step 2 : We parse the choice with regex: r'\s*([A-?])', where ? varies by number of choices.
         group_select: Selects the (group_select)th match from the findall result.
         ignore_case: Ignores the case during step 1 matching
         ignore_punctuation: Remove the punctuation during step 1 matching
 
@@ -46,7 +46,6 @@ def limit() -> int:
     return 10
 
 
-# Tests
 class BaseTasks:
     """
     Base class for testing tasks
@@ -166,45 +165,3 @@ class TestNewTasksElseDefault(BaseTasks):
     Test class parameterized with a list of new/modified tasks
     (or a set of default tasks if none have been modified)
     """
-
-
-@pytest.mark.parametrize(
-    "task_class",
-    task_class(
-        ["arc_easy_unitxt"], tasks.TaskManager(include_path="./tests/testconfigs")
-    ),
-    ids=lambda x: f"{x.config.task}",
-)
-class TestUnitxtTasks(BaseTasks):
-    """
-    Test class for Unitxt tasks parameterized with a small custom
-    task as described here:
-      https://www.unitxt.ai/en/latest/docs/lm_eval.html
-    """
-
-    def test_check_training_docs(self, task_class: ConfigurableTask):
-        if task_class.has_training_docs():
-            assert task_class.dataset["train"] is not None
-
-    def test_check_validation_docs(self, task_class):
-        if task_class.has_validation_docs():
-            assert task_class.dataset["validation"] is not None
-
-    def test_check_test_docs(self, task_class):
-        task = task_class
-        if task.has_test_docs():
-            assert task.dataset["test"] is not None
-
-    def test_doc_to_text(self, task_class, limit: int):
-        task = task_class
-        arr = (
-            list(islice(task.test_docs(), limit))
-            if task.has_test_docs()
-            else list(islice(task.validation_docs(), limit))
-        )
-        _array = [task.doc_to_text(doc) for doc in arr]
-        if not task.multiple_input:
-            for x in _array:
-                assert isinstance(x, str)
-        else:
-            pass
@@ -0,0 +1,49 @@
+from itertools import islice
+
+import pytest
+
+from lm_eval import tasks as tasks
+from lm_eval.api.task import ConfigurableTask
+from tests.test_tasks import BaseTasks, task_class
+
+
+@pytest.mark.parametrize(
+    "task_class",
+    task_class(
+        ["arc_easy_unitxt"], tasks.TaskManager(include_path="./tests/testconfigs")
+    ),
+    ids=lambda x: f"{x.config.task}",
+)
+class TestUnitxtTasks(BaseTasks):
+    """
+    Test class for Unitxt tasks parameterized with a small custom
+    task as described here:
+      https://www.unitxt.ai/en/latest/docs/lm_eval.html
+    """
+
+    def test_check_training_docs(self, task_class: ConfigurableTask):
+        if task_class.has_training_docs():
+            assert task_class.dataset["train"] is not None
+
+    def test_check_validation_docs(self, task_class):
+        if task_class.has_validation_docs():
+            assert task_class.dataset["validation"] is not None
+
+    def test_check_test_docs(self, task_class):
+        task = task_class
+        if task.has_test_docs():
+            assert task.dataset["test"] is not None
+
+    def test_doc_to_text(self, task_class, limit: int):
+        task = task_class
+        arr = (
+            list(islice(task.test_docs(), limit))
+            if task.has_test_docs()
+            else list(islice(task.validation_docs(), limit))
+        )
+        _array = [task.doc_to_text(doc) for doc in arr]
+        if not task.multiple_input:
+            for x in _array:
+                assert isinstance(x, str)
+        else:
+            pass