diff --git a/brainstorming/configs/baseline.yaml b/brainstorming/configs/baseline.yaml new file mode 100644 index 000000000..632dbc500 --- /dev/null +++ b/brainstorming/configs/baseline.yaml @@ -0,0 +1,33 @@ +# baseline.yaml shows 5 key config patterns + +output_dir: /tmp/torchtune/llama3_2_1B/full + +# PATTERN 1: Simple Component Instantiation +tokenizer: + _target_: mock.llama3_tokenizer + path: /tmp/Llama-3.2-1B-Instruct/original/tokenizer.model + +# PATTERN 2: Component with Nested Instantiation +model: + _target_: mock.llama3_2_1b + # Nested component: attention config + attn_config: + _target_: mock.MultiHeadAttention + num_heads: 32 + +# PATTERN 3: Component Needing Runtime Args (Partial) +optimizer: + _target_: torch.optim.AdamW + lr: 2e-5 + _partial_: true + # params: None #will be passed at instantiation time (not known now) + +# PATTERN 4: Non-Instantiated Config Block (Plain Data) +data_args: + batch_size: 4 + shuffle: True + +# PATTERN 5: Plain Top-Level Hyperparameters +# Training params +epochs: 1 +gradient_accumulation_steps: 8 diff --git a/brainstorming/configs/config_dataclasses.py b/brainstorming/configs/config_dataclasses.py new file mode 100644 index 000000000..369d77a2b --- /dev/null +++ b/brainstorming/configs/config_dataclasses.py @@ -0,0 +1,131 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Dataclass config with inner Config classes. + +Pros: +- Type safety for instantiated configs + +Cons: +- Requires modifying target classes (not feasible for external libraries - Need to use wrapper) +- Boilerplate (every class needs Config + __init__) + +eg: + +``` +class TokenizerWithConfig: + @dataclass + class Config: + path: str + + def build(self) -> "TokenizerWithConfig": + return TokenizerWithConfig(self) + + def __init__(self, config: Config): + self.config = config + self.path = config.path +``` +""" + +from dataclasses import dataclass + +import torch + +from mock_with_config import ( + ComponentConfig, + LlamaModelWithConfig, + MultiHeadAttentionWithConfig, + TokenizerWithConfig, +) + + +@dataclass +class DataArgs: + """Plain dataclass for non-instantiated config block (PATTERN 4).""" + + batch_size: int = 4 + shuffle: bool = True + + +def llama3_2_1b_full(): + output_dir = "/tmp/torchtune/llama3_2_1B/full" + + return { + "output_dir": output_dir, + # PATTERN 1: Simple Component Instantiation + "tokenizer": TokenizerWithConfig.Config( + path="/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model", + ), + # PATTERN 2: Component with Nested Instantiation + "model": LlamaModelWithConfig.Config( + attn_config=MultiHeadAttentionWithConfig.Config( + num_heads=32, + ) + ), + # PATTERN 3: Component Needing Runtime Args (Partial) + "optimizer": ComponentConfig( + component_cls=torch.optim.AdamW, + kwargs={"lr": 2e-5}, + ), + # PATTERN 4: Non-Instantiated Config Block (Plain Data) + "data_args": DataArgs( + batch_size=4, + shuffle=True, + ), + # PATTERN 5: Plain Top-Level Hyperparameters + "epochs": 1, + "gradient_accumulation_steps": 8, + } + + +if __name__ == "__main__": + # ========================================================================= + # Scenario 1: Basic Instantiation + # ========================================================================= + cfg = llama3_2_1b_full() + + # PATTERN 1: Simple Component Instantiation + tokenizer = cfg["tokenizer"].build() + + # PATTERN 2: Component with Nested Instantiation + model = cfg["model"].build() + + # PATTERN 3: Component Needing Runtime Args (Partial) + optimizer = cfg["optimizer"].build(model.parameters()) + + # ========================================================================= + # Scenario 2: Override Config Values + # ========================================================================= + cfg2 = llama3_2_1b_full() + + # PATTERN 1: Simple Component Instantiation + cfg2["tokenizer"].path = "/new/path" + + # PATTERN 2: Component with Nested Instantiation + cfg2["model"].attn_config.num_heads = 64 + + # PATTERN 3: Component Needing Runtime Args (Partial) + cfg2["optimizer"].kwargs["lr"] = 1e-4 + + model2 = cfg2["model"].build() + optimizer2 = cfg2["optimizer"].build(model2.parameters()) + + # ========================================================================= + # Scenario 3: Config Composition + # ========================================================================= + def llama3_2_1b_large_lr(): + """Variant with larger learning rate and different model config.""" + base = llama3_2_1b_full() + # Overrides + base["optimizer"].kwargs["lr"] = 1e-3 + base["model"].attn_config.num_heads = 64 + return base + + cfg_variant = llama3_2_1b_large_lr() + model_variant = cfg_variant["model"].build() + optimizer_variant = cfg_variant["optimizer"].build(model_variant.parameters()) + assert optimizer_variant.param_groups[0]["lr"] == 1e-3 diff --git a/brainstorming/configs/config_dicts.py b/brainstorming/configs/config_dicts.py new file mode 100644 index 000000000..80cb987c0 --- /dev/null +++ b/brainstorming/configs/config_dicts.py @@ -0,0 +1,130 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Config using Plain Python Dicts. + +Pros: +- Extremely simple +- No dependencies +- Easy to understand +- Flexible + +Cons: +- No type hints (cfg["batch_szie"] typo won't be caught) +- No validation (cfg["batch_size"] = "invalid" won't error) +- Very loose, users can pass anything +""" + +import torch.optim + +from mock import llama3_2_1b, llama3_tokenizer, MultiHeadAttention + + +def llama3_2_1b_full(): + output_dir = "/tmp/torchtune/llama3_2_1B/full" + batch_size = 4 + + return { + "output_dir": output_dir, + # PATTERN 1: Simple Component Instantiation + "tokenizer": { + "cls": llama3_tokenizer, + "kwargs": { + "path": "/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model", + }, + }, + # PATTERN 2: Component with Nested Instantiation + "model": { + "cls": llama3_2_1b, + "kwargs": { + "attn_config": { + "cls": MultiHeadAttention, + "kwargs": { + "num_heads": 32, + }, + } + }, + }, + # PATTERN 3: Component Needing Runtime Args (Partial) + "optimizer": { + "cls": torch.optim.AdamW, + "kwargs": { + "lr": 2e-5, + }, + }, + # PATTERN 4: Non-Instantiated Config Block (Plain Data) + "data_args": { + "batch_size": batch_size, + "shuffle": True, + }, + # PATTERN 5: Plain Top-Level Hyperparameters + "epochs": 1, + "gradient_accumulation_steps": 8, + } + + +if __name__ == "__main__": + # ========================================================================= + # Scenario 1: Basic Instantiation + # ========================================================================= + cfg = llama3_2_1b_full() + + # PATTERN 1: Simple Component Instantiation + tokenizer = cfg["tokenizer"]["cls"](**cfg["tokenizer"]["kwargs"]) + + # PATTERN 2: Component with Nested Instantiation + attn_config = cfg["model"]["kwargs"]["attn_config"]["cls"]( + **cfg["model"]["kwargs"]["attn_config"]["kwargs"] + ) + model = cfg["model"]["cls"](attn_config=attn_config) + + # PATTERN 3: Component Needing Runtime Args (Partial) + optimizer = cfg["optimizer"]["cls"]( + model.parameters(), **cfg["optimizer"]["kwargs"] + ) + + # ========================================================================= + # Scenario 2: Override Config Values + # ========================================================================= + cfg2 = llama3_2_1b_full() + + # PATTERN 1: Simple Component Instantiation + cfg2["tokenizer"]["kwargs"]["path"] = "/new/tokenizer" + + # PATTERN 2: Component with Nested Instantiation + cfg2["model"]["kwargs"]["attn_config"]["kwargs"]["num_heads"] = 64 + + # PATTERN 3: Component Needing Runtime Args (Partial) + cfg2["optimizer"]["kwargs"]["lr"] = 1e-4 + + model2 = cfg2["model"]["cls"]( + attn_config=cfg2["model"]["kwargs"]["attn_config"]["cls"]( + **cfg2["model"]["kwargs"]["attn_config"]["kwargs"] + ) + ) + optimizer2 = cfg2["optimizer"]["cls"]( + model2.parameters(), **cfg2["optimizer"]["kwargs"] + ) + + # ========================================================================= + # Scenario 3: Config Composition + # ========================================================================= + def llama3_2_1b_large_lr(): + """Variant with larger learning rate.""" + base = llama3_2_1b_full() + base["optimizer"]["kwargs"]["lr"] = 1e-3 + base["model"]["kwargs"]["attn_config"]["kwargs"]["num_heads"] = 64 + return base + + cfg_variant = llama3_2_1b_large_lr() + attn_config_variant = cfg_variant["model"]["kwargs"]["attn_config"]["cls"]( + **cfg_variant["model"]["kwargs"]["attn_config"]["kwargs"] + ) + model_variant = cfg_variant["model"]["cls"](attn_config=attn_config_variant) + optimizer_variant = cfg_variant["optimizer"]["cls"]( + model_variant.parameters(), **cfg_variant["optimizer"]["kwargs"] + ) diff --git a/brainstorming/configs/config_fiddle.py b/brainstorming/configs/config_fiddle.py new file mode 100644 index 000000000..8ffe26b16 --- /dev/null +++ b/brainstorming/configs/config_fiddle.py @@ -0,0 +1,122 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Config using Fiddle approach (Google's lazy instantiation). + +Fiddle provides dict-style config with lazy instantiation. + +Pros: +- Easy to override before building +- Clear separation: config definition vs instantiation + +Cons: +- External dependency (pip install fiddle-config) +- Learning curve +- Less common outside Google +""" + +import fiddle as fdl +import torch.optim + +from mock import llama3_2_1b, llama3_tokenizer, MultiHeadAttention + + +# ====================================================================== +# Config Factory Function +# ====================================================================== + + +def llama3_2_1b_full(): + output_dir = "/tmp/torchtune/llama3_2_1B/full" + batch_size = 4 + + # Start with empty dict and build step by step + cfg = {} + cfg["output_dir"] = output_dir + + # PATTERN 1: Simple Component Instantiation + cfg["tokenizer"] = fdl.Config( + llama3_tokenizer, + path="/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model", + ) + + # PATTERN 2: Component with Nested Instantiation + cfg["model"] = fdl.Config( + llama3_2_1b, + attn_config=fdl.Config(MultiHeadAttention, num_heads=32), + ) + + # PATTERN 3: Component Needing Runtime Args (Partial) + cfg["optimizer"] = fdl.Partial( + torch.optim.AdamW, + lr=2e-5, + ) + + # PATTERN 4: Non-Instantiated Config Block (Plain Data) + cfg["data_args"] = { + "batch_size": batch_size, + "shuffle": True, + } + + # PATTERN 5: Plain Top-Level Hyperparameters + cfg["epochs"] = 1 + cfg["gradient_accumulation_steps"] = 8 + + return cfg + + +if __name__ == "__main__": + + # ========================================================================= + # Scenario 1: Basic Instantiation + # ========================================================================= + cfg = llama3_2_1b_full() + + # PATTERN 1: Simple Component Instantiation + tokenizer = fdl.build(cfg["tokenizer"]) + + # PATTERN 2: Component with Nested Instantiation + model = fdl.build(cfg["model"]) + + # PATTERN 3: Component Needing Runtime Args (Partial) + optimizer_partial = fdl.build(cfg["optimizer"]) + optimizer = optimizer_partial(params=model.parameters()) + + # ========================================================================= + # Scenario 2: Override Before Build + # ========================================================================= + cfg2 = llama3_2_1b_full() + + # PATTERN 1: Simple Component Instantiation + cfg2["tokenizer"].path = "/new/path" + + # PATTERN 2: Component with Nested Instantiation + cfg2["model"].attn_config.num_heads = 64 + + # PATTERN 3: Component Needing Runtime Args (Partial) + cfg2["optimizer"].lr = 1e-4 + + model2 = fdl.build(cfg2["model"]) + optimizer_partial2 = fdl.build(cfg2["optimizer"]) + optimizer2 = optimizer_partial2(params=model2.parameters()) + + assert cfg2["model"].attn_config.num_heads == 64 + + # ========================================================================= + # Scenario 3: Config Composition (Base + Variant) + # ========================================================================= + def llama3_2_1b_large_lr(): + """Variant with larger learning rate.""" + base = llama3_2_1b_full() + base["optimizer"].lr = 1e-3 + base["model"].attn_config.num_heads = 64 + return base + + cfg_variant = llama3_2_1b_large_lr() + model_variant = fdl.build(cfg_variant["model"]) + optimizer_partial_variant = fdl.build(cfg_variant["optimizer"]) + optimizer_variant = optimizer_partial_variant(params=model_variant.parameters()) diff --git a/brainstorming/configs/config_hydra.py b/brainstorming/configs/config_hydra.py new file mode 100644 index 000000000..6da15dce1 --- /dev/null +++ b/brainstorming/configs/config_hydra.py @@ -0,0 +1,83 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Config using Hydra (YAML-based lazy instantiation). + +Pros: +- YAML syntax (human-readable) +- Native composition of yamls +- Lazy instantiation via hydra.utils.instantiate +- Command-line override for free (--optimizer.lr=1e-4) + +Cons: +- External dependency (pip install hydra-core) +- yaml is not .py +""" + +import os + +from hydra import compose, initialize_config_dir +from hydra.utils import instantiate + + +def load_config(): + """Load baseline.yaml using Hydra.""" + config_dir = os.path.abspath(os.path.dirname(__file__)) + + with initialize_config_dir(config_dir=config_dir, version_base=None): + cfg = compose(config_name="baseline") + + return cfg + + +if __name__ == "__main__": + # ========================================================================= + # Scenario 1: Basic Instantiation + # ========================================================================= + cfg = load_config() + + # PATTERN 1: Simple Component Instantiation + tokenizer = instantiate(cfg.tokenizer) + + # PATTERN 2: Component with Nested Instantiation + model = instantiate(cfg.model) + + # PATTERN 3: Component Needing Runtime Args (Partial) + optimizer_partial = instantiate(cfg.optimizer) + optimizer = optimizer_partial(params=model.parameters()) + + # ========================================================================= + # Scenario 2: Override Config Values + # ========================================================================= + cfg2 = load_config() + + # PATTERN 1: Simple Component Instantiation + cfg2.tokenizer.path = "/new/path" + tokenizer2 = instantiate(cfg2.tokenizer) + + # PATTERN 2: Component with Nested Instantiation + cfg2.model.attn_config.num_heads = 64 + model2 = instantiate(cfg2.model) + + # PATTERN 3: Component Needing Runtime Args (Partial) + cfg2.optimizer.lr = 1e-4 + optimizer_partial2 = instantiate(cfg2.optimizer) + optimizer2 = optimizer_partial2(params=model2.parameters()) + + # ========================================================================= + # Scenario 3: Config Composition (Base + Variant) + # ========================================================================= + # Load variant config that uses defaults to inherit from baseline + config_dir = os.path.abspath(os.path.dirname(__file__)) + with initialize_config_dir(config_dir=config_dir, version_base=None): + cfg_variant = compose(config_name="variant/baseline_different_bsz") + + # Verify the variant has inherited from baseline and overridden batch_size + assert cfg_variant.data_args.batch_size == 32 + + # Can instantiate components from the variant config + model_variant = instantiate(cfg_variant.model) diff --git a/brainstorming/configs/config_partial.py b/brainstorming/configs/config_partial.py new file mode 100644 index 000000000..699af7c1d --- /dev/null +++ b/brainstorming/configs/config_partial.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Config using functools.partial approach (stdlib lazy instantiation). + + +Pros: +- Stdlib only (no dependencies) +- Lazy instantiation + +Cons: +- Confusing if an object is a partial or not +- No validation +""" + +from dataclasses import dataclass +from functools import partial + +import torch.optim + +from mock import llama3_2_1b, llama3_tokenizer, MultiHeadAttention + + +@dataclass +class DataArgs: + """Plain dataclass for non-instantiated config block (PATTERN 4).""" + + batch_size: int = 4 + shuffle: bool = True + + +def llama3_2_1b_full(): + output_dir = "/tmp/torchtune/llama3_2_1B/full" + + return { + "output_dir": output_dir, + # PATTERN 1: Simple Component Instantiation + "tokenizer": partial( + llama3_tokenizer, + path="/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model", + ), + # PATTERN 2: Component with Nested Instantiation + "model": partial( + llama3_2_1b, + attn_config=partial(MultiHeadAttention, num_heads=32), + ), + # PATTERN 3: Component Needing Runtime Args (Partial) + "optimizer": partial( + torch.optim.AdamW, + lr=2e-5, + ), + # PATTERN 4: Non-Instantiated Config Block (Plain Data) + "data_args": DataArgs( + batch_size=4, + shuffle=True, + ), + # PATTERN 5: Plain Top-Level Hyperparameters + "epochs": 1, + "gradient_accumulation_steps": 8, + } + + +if __name__ == "__main__": + # ========================================================================= + # Scenario 1: Basic Instantiation + # ========================================================================= + cfg = llama3_2_1b_full() + + # PATTERN 1: Simple Component Instantiation + tokenizer = cfg["tokenizer"]() + + # PATTERN 2: Component with Nested Instantiation + model = cfg["model"]() + + # PATTERN 3: Component Needing Runtime Args (Partial) + optimizer = cfg["optimizer"](params=model.parameters()) + + # ========================================================================= + # Scenario 2: Override Config Values + # ========================================================================= + cfg2 = llama3_2_1b_full() + + # PATTERN 1: Simple Component Instantiation + tokenizer2 = cfg2["tokenizer"](path="/new/path") + + # PATTERN 2: Component with Nested Instantiation + inner_partial = cfg2["model"].keywords["attn_config"] + inner_partial.keywords["num_heads"] = 64 + model2 = cfg2["model"]() + + # PATTERN 3: Component Needing Runtime Args (Partial) + optimizer2 = cfg2["optimizer"](params=model2.parameters(), lr=1e-4) + + # ========================================================================= + # Scenario 3: Config Composition + # ========================================================================= + def llama3_2_1b_large_lr(): + """Variant with larger learning rate and more attention heads.""" + base = llama3_2_1b_full() + base["optimizer"].keywords["lr"] = 1e-3 + base["model"].keywords["attn_config"].keywords["num_heads"] = 64 + return base + + cfg_variant = llama3_2_1b_large_lr() + model_variant = cfg_variant["model"]() + optimizer_variant = cfg_variant["optimizer"](params=model_variant.parameters()) diff --git a/brainstorming/configs/mock.py b/brainstorming/configs/mock.py new file mode 100644 index 000000000..11f65735f --- /dev/null +++ b/brainstorming/configs/mock.py @@ -0,0 +1,79 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Mock implementations for demonstration purposes. + +In a real project, these would be actual imports from: +- torchtune.models.llama3 import llama3_tokenizer +- torchtune.models.llama3_2 import llama3_2_1b +- torchtune.modules.attention import MultiHeadAttention +- torchtune.training import FullModelHFCheckpointer +- etc. +""" + +import torch +import torch.nn as nn +import torch.optim + + +# ============================================================================= +# Mock Functions and Classes (Standard) +# ============================================================================= + + +llama3_tokenizer = lambda path, **kwargs: type("Tokenizer", (), {})() + + +def llama3_2_1b(attn_config=None): + """Mock Llama model.""" + + class LlamaModel(nn.Module): + def __init__(self): + super().__init__() + if isinstance(attn_config, nn.Module): + self.attn = attn_config + else: + # partial + self.attn = attn_config() + self.dummy = nn.Parameter(torch.zeros(10, 10)) + + def parameters(self, recurse=True): + return super().parameters(recurse) + + return LlamaModel() + + +alpaca_cleaned_dataset = lambda tokenizer, packed=False, split="train": type( + "Dataset", (), {} +)() + + +class MultiHeadAttention(nn.Module): + """Mock MultiHeadAttention module.""" + + def __init__(self, num_heads=32, **kwargs): + super().__init__() + self.num_heads = num_heads + + def forward(self, x): + return x + + +class FullModelHFCheckpointer: + """Mock checkpoint loader.""" + + __slots__ = ("checkpoint_dir",) + + def __init__(self, checkpoint_dir, **kwargs): + self.checkpoint_dir = checkpoint_dir + + +LinearCrossEntropyLoss = type("LinearCrossEntropyLoss", (), {}) + +DiskLogger = type("DiskLogger", (), {}) + +setup_torch_profiler = lambda enabled, output_dir: None diff --git a/brainstorming/configs/mock_with_config.py b/brainstorming/configs/mock_with_config.py new file mode 100644 index 000000000..86ff8160c --- /dev/null +++ b/brainstorming/configs/mock_with_config.py @@ -0,0 +1,91 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Mock implementations with inner Config classes for config_dataclasses.py. + +These show the pattern where each class has an inner Config class. +In a real project, you would modify your own classes to add these Config inner classes. +""" + +from dataclasses import dataclass +from typing import Any, Type + +import torch +import torch.nn as nn + + +# ============================================================================= +# Mocks with Inner Config Classes +# ============================================================================= + + +class TokenizerWithConfig: + """Custom tokenizer with inner Config class.""" + + @dataclass + class Config: + path: str + + def build(self) -> "TokenizerWithConfig": + return TokenizerWithConfig(self) + + def __init__(self, config: Config): + self.config = config + self.path = config.path + + +class MultiHeadAttentionWithConfig(nn.Module): + """Attention module with inner Config class.""" + + @dataclass + class Config: + num_heads: int + + def build(self) -> "MultiHeadAttentionWithConfig": + return MultiHeadAttentionWithConfig(self) + + def __init__(self, config: Config): + super().__init__() + self.config = config + self.num_heads = config.num_heads + + +class LlamaModelWithConfig(nn.Module): + """Model with inner Config class that contains nested config.""" + + @dataclass + class Config: + attn_config: MultiHeadAttentionWithConfig.Config + + def build(self) -> "LlamaModelWithConfig": + return LlamaModelWithConfig(self) + + def __init__(self, config: Config): + super().__init__() + self.config = config + self.attn = config.attn_config.build() + + def parameters(self): + """Mock parameters for demo.""" + return iter([torch.zeros(10, 10)]) + + +# ============================================================================= +# Generic Config Wrappers for External Libraries +# ============================================================================= + + +@dataclass +class ComponentConfig: + """Generic wrapper for any component class.""" + + component_cls: Type + kwargs: dict[str, Any] + + def build(self, *args, **runtime_kwargs): + merged_kwargs = {**self.kwargs, **runtime_kwargs} + return self.component_cls(*args, **merged_kwargs) diff --git a/brainstorming/configs/variant/baseline_different_bsz.yaml b/brainstorming/configs/variant/baseline_different_bsz.yaml new file mode 100644 index 000000000..e09126dde --- /dev/null +++ b/brainstorming/configs/variant/baseline_different_bsz.yaml @@ -0,0 +1,12 @@ +# @package _global_ + +# Variant config: inherits from baseline with different batch size +# This demonstrates Hydra's config composition via defaults list + +defaults: + - /baseline + - _self_ + +# Override batch size (applied after baseline due to _self_ position) +data_args: + batch_size: 32