Updated ODM defaults (#648)

romitjain · web-flow · commit 890725c78d84 · 2025-12-03T20:41:22.000+05:30
Signed-off-by: romitjain &lt;romit@ibm.com&gt;
diff --git a/docs/advanced-data-preprocessing.md b/docs/advanced-data-preprocessing.md
@@ -147,11 +147,13 @@ Each data handler has:
 `odm` config has the following fields and is required when `datapreprocessor` `type` is `odm`.
 
 `odm`:
-  `update_interval` (optional, int, defaults to `1`): Multi-Armed Bandit (MAB) is used to learn from the training signals and then provide mixing probabilities across datasets. `update_interval` defines the frequency of updating the MAB with training signals in terms of step count.
-  `sampling_interval` (optional, int, defaults to `1`): Defines the frequency of choosing a dataset to sample from through MAB. The value is provided in terms of sample count.
-  `reward_type` (optional, str, defaults to `entropy`): Type of reward to be used to update MAB. Currently supported rewards are `train_loss`, `validation_loss`, `entropy`, `entropy3_varent1`, `entropy_last_token`, `gradnorm`. More details can be found [here](https://github.com/foundation-model-stack/fms-acceleration/tree/main/plugins/online-data-mixing#rewards).
-  `gamma` (optional, int, defaults to `0.1`): MAB hyper-parameter which is similar to exploration factor.
-  `eta` (optional, int, defaults to `0.1`): MAB hyper-parameter which is similar to learning rate.
+- `update_interval` (optional, int, defaults to `None`): Multi-Armed Bandit (MAB) is used to learn from the training signals and then provide mixing probabilities across datasets. `update_interval` defines the frequency of updating the MAB with training signals in terms of step count. If not provided, it defaults to `eval_steps`
+- `sampling_interval` (optional, int, defaults to `1`): Defines the frequency of choosing a dataset to sample from through MAB. The value is provided in terms of sample count.
+- `reward_type` (optional, str, defaults to `entropy`): Type of reward to be used to update MAB. Currently supported rewards are `train_loss`, `validation_loss`, `entropy`, `entropy3_varent1`, `entropy_last_token`, `gradnorm`. More details can be found [here](https://github.com/foundation-model-stack/fms-acceleration/tree/main/plugins/online-data-mixing#rewards).
+- `gamma` (optional, int, defaults to `0.1`): MAB hyper-parameter which is similar to exploration factor.
+- `eta` (optional, int, defaults to `0.3`): MAB hyper-parameter which is similar to learning rate.
+- `auto_categorize_input_column` (optional, str, defaults to `None`): If only a single dataset is provided, this field is required to determin the column name which should be used to categorize the data into psuedo categories
+- `auto_categorize_num_categories` (optional, int, defaults to `None`): Used in conjunction with the above field, this field specifies the number of psuedo categories to be assigned in the dataset
 
 `datasets` (list):
   - `name` (optional, str): A unique identifier for the dataset.
diff --git a/tuning/config/acceleration_configs/odm.py b/tuning/config/acceleration_configs/odm.py
@@ -24,10 +24,10 @@
 @dataclass
 class ODM:
     update_interval: int = None
-    sampling_interval: int = None
-    reward_type: str = None
+    sampling_interval: int = 1
+    reward_type: str = "entropy"
     gamma: float = 0.1
-    eta: float = 0.1
+    eta: float = 0.3
     resume_from_checkpoint: Union[bool, str] = False
     auto_categorize_input_column: str = None
     auto_categorize_num_categories: Optional[int] = None
diff --git a/tuning/data/setup_dataprocessor.py b/tuning/data/setup_dataprocessor.py
@@ -549,7 +549,7 @@ def setup_train_dataset_for_odm(
         )
 
     auto_categorize_config = {}
-    if hasattr(odm_config.odm, "auto_categorize_input_column"):
+    if odm_config.odm.auto_categorize_input_column:
         auto_categorize_config = {
             "input_column": "input_ids",
             "num_categories": int(odm_config.odm.auto_categorize_num_categories),
diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
@@ -160,6 +160,9 @@ def train(
                 "resume_from_checkpoint"
             ] = resume_from_checkpoint
             odm_config = ODMConfig(odm=ODM(**_dataconfig.dataprocessor.odm))
+            odm_config.odm.update_interval = (
+                odm_config.odm.update_interval or train_args.eval_steps
+            )
 
     # Validate parameters
     if (not isinstance(model_args.model_name_or_path, str)) or (

Original file line number	Diff line number	Diff line change
`@@ -549,7 +549,7 @@ def setup_train_dataset_for_odm(`
`549`	`549`	`)`
`550`	`550`
`551`	`551`	`auto_categorize_config = {}`
`552`		`- if hasattr(odm_config.odm, "auto_categorize_input_column"):`
	`552`	`+ if odm_config.odm.auto_categorize_input_column:`
`553`	`553`	`auto_categorize_config = {`
`554`	`554`	`"input_column": "input_ids",`
`555`	`555`	`"num_categories": int(odm_config.odm.auto_categorize_num_categories),`