Skip to content

Commit 039d526

Browse files
Deprecate unused dataset_formatting module (#4242)
Co-authored-by: behroozazarkhalili <ermiaazarkhalili> Co-authored-by: Quentin Gallouédec <[email protected]>
1 parent bcd059a commit 039d526

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

tests/test_dataset_formatting.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from typing import Callable
1616

17+
import pytest
1718
from datasets import Dataset, load_dataset
1819
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
1920

@@ -23,6 +24,7 @@
2324
from .testing_utils import TrlTestCase
2425

2526

27+
@pytest.mark.filterwarnings("ignore::FutureWarning")
2628
class TestDatasetFormatting(TrlTestCase):
2729
def setup_method(self):
2830
self.llama_tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-MistralForCausalLM-0.1")

trl/extras/dataset_formatting.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import logging
16+
import warnings
1617
from typing import Callable, Literal, Optional
1718

1819
import datasets
@@ -41,7 +42,17 @@ def conversations_formatting_function(
4142
r"""
4243
return a callable function that takes in a "messages" dataset and returns a formatted dataset, based on the
4344
tokenizer apply chat template to the dataset along with the schema of the list of functions in the tools list.
45+
46+
.. deprecated:: 0.24.0
47+
`conversations_formatting_function` is deprecated and will be removed in version 0.27.
48+
Please use `tokenizer.apply_chat_template()` directly instead.
4449
"""
50+
warnings.warn(
51+
"`conversations_formatting_function` is deprecated and will be removed in TRL 0.27. "
52+
"Please use `tokenizer.apply_chat_template()` directly instead.",
53+
DeprecationWarning,
54+
stacklevel=2,
55+
)
4556

4657
def format_dataset(examples):
4758
if isinstance(examples[messages_field][0], list):
@@ -61,7 +72,17 @@ def instructions_formatting_function(tokenizer: AutoTokenizer):
6172
r"""
6273
return a callable function that takes in an "instructions" dataset and returns a formatted dataset, based on the
6374
tokenizer apply chat template to the dataset
75+
76+
.. deprecated:: 0.24.0
77+
`instructions_formatting_function` is deprecated and will be removed in version 0.27.
78+
Please use `tokenizer.apply_chat_template()` directly instead.
6479
"""
80+
warnings.warn(
81+
"`instructions_formatting_function` is deprecated and will be removed in TRL 0.27. "
82+
"Please use `tokenizer.apply_chat_template()` directly instead.",
83+
DeprecationWarning,
84+
stacklevel=2,
85+
)
6586

6687
def format_dataset(examples):
6788
if isinstance(examples["prompt"], list):
@@ -99,7 +120,18 @@ def get_formatting_func_from_dataset(
99120
100121
Returns:
101122
Callable: Formatting function if the dataset format is supported else None
123+
124+
.. deprecated:: 0.24.0
125+
`get_formatting_func_from_dataset` is deprecated and will be removed in version 0.27.
126+
Please use `tokenizer.apply_chat_template()` directly instead.
102127
"""
128+
warnings.warn(
129+
"`get_formatting_func_from_dataset` is deprecated and will be removed in TRL 0.27. "
130+
"Please use `tokenizer.apply_chat_template()` directly instead.",
131+
DeprecationWarning,
132+
stacklevel=2,
133+
)
134+
103135
if isinstance(dataset, Dataset):
104136
if "messages" in dataset.features:
105137
if dataset.features["messages"] == FORMAT_MAPPING["chatml"]:

0 commit comments

Comments
 (0)