Skip to content

Commit 53df859

Browse files
authored
[IFEval] Speed up think tag removal (#792)
* [IFEval] Speed up think tag removal * Refactor and add tests * Add moar tests
1 parent 8260f59 commit 53df859

File tree

3 files changed

+95
-2
lines changed

3 files changed

+95
-2
lines changed

src/lighteval/tasks/extended/ifeval/main.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222

23-
import re
2423

2524
import numpy as np
2625
from aenum import extend_enum
@@ -34,6 +33,7 @@
3433
)
3534
from lighteval.tasks.lighteval_task import LightevalTaskConfig
3635
from lighteval.tasks.requests import Doc
36+
from lighteval.utils.utils import remove_reasoning_tags
3737

3838

3939
# Very specific task where there are no precise outputs but instead we test if the format obeys rules
@@ -55,10 +55,15 @@ def ifeval_prompt(line, task_name: str = None):
5555
"inst_level_loose_acc",
5656
]
5757

58+
REASONING_TAG_PAIRS = [
59+
("<think>", "</think>"),
60+
]
61+
5862

5963
def ifeval_metric(predictions: list[str], formatted_doc: Doc, **kwargs) -> dict:
6064
response = predictions[0]
61-
response = re.sub(r"(<think>)?[\s\S]*?<\/think>", "", response)
65+
# Remove the reasoning block to avoid false negatives: https://github.com/huggingface/lighteval/issues/790
66+
response = remove_reasoning_tags(response, REASONING_TAG_PAIRS)
6267

6368
# Strict instructions
6469
instruction_list = formatted_doc.specific["instructions_id_list"]

src/lighteval/utils/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,28 @@ def download_dataset_worker(
229229

230230
def safe_divide(numerator: np.ndarray, denominator: float, default_value: float = 0.0) -> np.ndarray:
231231
return np.where(denominator != 0, numerator / denominator, default_value)
232+
233+
234+
def remove_reasoning_tags(text: str, tag_pairs: list[tuple[str, str]]) -> str:
235+
"""Remove all instances of reasoning tag pairs from text.
236+
237+
See: https://github.com/huggingface/lighteval/issues/790
238+
239+
Example:
240+
>>> text = "<think> Reasoning section </think> Answer section"
241+
>>> tag_pairs = [("<think>", "</think>")]
242+
>>> remove_reasoning_tags(text, tag_pairs)
243+
' Answer section'
244+
"""
245+
result = text
246+
247+
for start_tag, end_tag in tag_pairs:
248+
while start_tag in result and end_tag in result:
249+
start = result.find(start_tag)
250+
end = result.find(end_tag, start)
251+
if start != -1 and end != -1:
252+
result = result[:start] + result[end + len(end_tag) :]
253+
else:
254+
break
255+
256+
return result

tests/utils/test_utils.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# MIT License
2+
3+
# Copyright (c) 2024 The HuggingFace Team
4+
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
import unittest
24+
25+
from lighteval.utils.utils import remove_reasoning_tags
26+
27+
28+
class TestRemoveReasoningTags(unittest.TestCase):
29+
def test_remove_reasoning_tags(self):
30+
text = "<think> Reasoning section </think> Answer section"
31+
tag_pairs = [("<think>", "</think>")]
32+
result = remove_reasoning_tags(text, tag_pairs)
33+
self.assertEqual(result, " Answer section")
34+
35+
def test_remove_multiple_tags(self):
36+
text = "<think> Reasoning </think> Interlude <think> More reasoning </think> Answer"
37+
tag_pairs = [("<think>", "</think>")]
38+
result = remove_reasoning_tags(text, tag_pairs)
39+
self.assertEqual(result, " Interlude Answer")
40+
41+
def test_no_tags(self):
42+
text = "No reasoning tags here."
43+
tag_pairs = [("<think>", "</think>")]
44+
result = remove_reasoning_tags(text, tag_pairs)
45+
self.assertEqual(result, "No reasoning tags here.")
46+
47+
def test_empty_text(self):
48+
text = ""
49+
tag_pairs = [("<think>", "</think>")]
50+
result = remove_reasoning_tags(text, tag_pairs)
51+
self.assertEqual(result, "")
52+
53+
def test_no_opening_tag(self):
54+
text = "No opening tag <think> Reasoning section. </think> Answer section"
55+
tag_pairs = [("<think>", "</think>")]
56+
result = remove_reasoning_tags(text, tag_pairs)
57+
self.assertEqual(result, "No opening tag Answer section")
58+
59+
def test_no_closing_tag(self):
60+
text = "<think> Reasoning section. Answer section"
61+
tag_pairs = [("<think>", "</think>")]
62+
result = remove_reasoning_tags(text, tag_pairs)
63+
self.assertEqual(result, "<think> Reasoning section. Answer section")

0 commit comments

Comments
 (0)