File tree Expand file tree Collapse file tree 3 files changed +39
-10
lines changed
Expand file tree Collapse file tree 3 files changed +39
-10
lines changed Original file line number Diff line number Diff line change @@ -15,15 +15,20 @@ launcher:
1515backend :
1616 device : cuda
1717 device_ids : 0
18- no_weights : true
18+ no_weights : False
1919 task : text-generation
20- model : facebook/opt-125m
20+ model : openai/gpt-oss-20b
21+ torch_dtype : auto
22+ device_map : auto
2123
2224scenario :
2325 dataset_name : EnergyStarAI/text_generation
2426 text_column_name : text
2527 num_samples : 1000
2628 truncation : True
29+ reasoning : True
30+ reasoning_params :
31+ reasoning_effort : high
2732
2833 input_shapes :
2934 batch_size : 1
Original file line number Diff line number Diff line change @@ -292,13 +292,34 @@ def tokenize_function(examples):
292292 padding = padding ,
293293 )
294294
295- dataset = dataset .map (
296- function = tokenize_function ,
297- desc = "Running tokenizer on dataset" ,
298- remove_columns = dataset .features ,
299- writer_batch_size = 50 ,
300- batched = True ,
301- ).with_format ("torch" )
295+ def reasoning_tokenize_function (examples ):
296+ return pretrained_processor .apply_chat_template (
297+ [{"role" : "user" , "content" : examples [scenario_config .text_column_name ]}],
298+ truncation = scenario_config .truncation ,
299+ max_length = min (max_length , 2048 ) - new_tokens ,
300+ padding = padding ,
301+ add_generation_prompt = True ,
302+ enable_thinking = True ,
303+ tokenize = True ,
304+ return_dict = True ,
305+ ** scenario_config .reasoning_params ,
306+ )
307+
308+ if scenario_config .reasoning :
309+ dataset = dataset .map (
310+ function = reasoning_tokenize_function ,
311+ desc = "Running reasoning tokenizer on dataset" ,
312+ remove_columns = dataset .features ,
313+ ).with_format ("torch" )
314+
315+ else :
316+ dataset = dataset .map (
317+ function = tokenize_function ,
318+ desc = "Running tokenizer on dataset" ,
319+ remove_columns = dataset .features ,
320+ writer_batch_size = 50 ,
321+ batched = True ,
322+ ).with_format ("torch" )
302323
303324 return dataset
304325
Original file line number Diff line number Diff line change @@ -34,7 +34,10 @@ class EnergyStarConfig(ScenarioConfig):
3434 dataset_prefix1 : str = field (default = "" , metadata = {"help" : "Prefix to add to text2textgeneration input." })
3535 dataset_prefix2 : str = field (default = "" , metadata = {"help" : "Prefix to add to text2textgeneration input." })
3636 t5_task : str = field (default = "" , metadata = {"help" : "Task for categorizing text2textgeneration tasks." })
37-
37+ reasoning : Union [bool , str ] = field (default = False , metadata = {"help" : "To activate reasoning mode." })
38+ reasoning_params : Dict [str , Any ] = field (
39+ default_factory = dict , metadata = {"help" : "Additional parameters for reasoning model." }
40+ )
3841 # image dataset options
3942 image_column_name : str = field (default = "image" , metadata = {"help" : "Name of the column with the image input." })
4043 resize : Union [bool , str ] = field (default = False , metadata = {"help" : "To resize the input images." })
You can’t perform that action at this time.
0 commit comments