diff --git a/brainstorming/configs/baseline.yaml b/brainstorming/configs/baseline.yaml
new file mode 100644
index 000000000..632dbc500
--- /dev/null
+++ b/brainstorming/configs/baseline.yaml
@@ -0,0 +1,33 @@
+# baseline.yaml shows 5 key config patterns
+
+output_dir: /tmp/torchtune/llama3_2_1B/full
+
+# PATTERN 1: Simple Component Instantiation
+tokenizer:
+  _target_: mock.llama3_tokenizer
+  path: /tmp/Llama-3.2-1B-Instruct/original/tokenizer.model
+
+# PATTERN 2: Component with Nested Instantiation
+model:
+  _target_: mock.llama3_2_1b
+  # Nested component: attention config
+  attn_config:
+    _target_: mock.MultiHeadAttention
+    num_heads: 32
+
+# PATTERN 3: Component Needing Runtime Args (Partial)
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 2e-5
+  _partial_: true
+  # params: None #will be passed at instantiation time (not known now)
+
+# PATTERN 4: Non-Instantiated Config Block (Plain Data)
+data_args:
+  batch_size: 4
+  shuffle: True
+
+# PATTERN 5: Plain Top-Level Hyperparameters
+# Training params
+epochs: 1
+gradient_accumulation_steps: 8
diff --git a/brainstorming/configs/config_dataclasses.py b/brainstorming/configs/config_dataclasses.py
new file mode 100644
index 000000000..369d77a2b
--- /dev/null
+++ b/brainstorming/configs/config_dataclasses.py
@@ -0,0 +1,131 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Dataclass config with inner Config classes.
+
+Pros:
+- Type safety for instantiated configs
+
+Cons:
+- Requires modifying target classes (not feasible for external libraries - Need to use wrapper)
+- Boilerplate (every class needs Config + __init__)
+
+eg:
+
+```
+class TokenizerWithConfig:
+    @dataclass
+    class Config:
+        path: str
+
+        def build(self) -> "TokenizerWithConfig":
+            return TokenizerWithConfig(self)
+
+    def __init__(self, config: Config):
+        self.config = config
+        self.path = config.path
+```
+"""
+
+from dataclasses import dataclass
+
+import torch
+
+from mock_with_config import (
+    ComponentConfig,
+    LlamaModelWithConfig,
+    MultiHeadAttentionWithConfig,
+    TokenizerWithConfig,
+)
+
+
+@dataclass
+class DataArgs:
+    """Plain dataclass for non-instantiated config block (PATTERN 4)."""
+
+    batch_size: int = 4
+    shuffle: bool = True
+
+
+def llama3_2_1b_full():
+    output_dir = "/tmp/torchtune/llama3_2_1B/full"
+
+    return {
+        "output_dir": output_dir,
+        # PATTERN 1: Simple Component Instantiation
+        "tokenizer": TokenizerWithConfig.Config(
+            path="/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model",
+        ),
+        # PATTERN 2: Component with Nested Instantiation
+        "model": LlamaModelWithConfig.Config(
+            attn_config=MultiHeadAttentionWithConfig.Config(
+                num_heads=32,
+            )
+        ),
+        # PATTERN 3: Component Needing Runtime Args (Partial)
+        "optimizer": ComponentConfig(
+            component_cls=torch.optim.AdamW,
+            kwargs={"lr": 2e-5},
+        ),
+        # PATTERN 4: Non-Instantiated Config Block (Plain Data)
+        "data_args": DataArgs(
+            batch_size=4,
+            shuffle=True,
+        ),
+        # PATTERN 5: Plain Top-Level Hyperparameters
+        "epochs": 1,
+        "gradient_accumulation_steps": 8,
+    }
+
+
+if __name__ == "__main__":
+    # =========================================================================
+    # Scenario 1: Basic Instantiation
+    # =========================================================================
+    cfg = llama3_2_1b_full()
+
+    # PATTERN 1: Simple Component Instantiation
+    tokenizer = cfg["tokenizer"].build()
+
+    # PATTERN 2: Component with Nested Instantiation
+    model = cfg["model"].build()
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    optimizer = cfg["optimizer"].build(model.parameters())
+
+    # =========================================================================
+    # Scenario 2: Override Config Values
+    # =========================================================================
+    cfg2 = llama3_2_1b_full()
+
+    # PATTERN 1: Simple Component Instantiation
+    cfg2["tokenizer"].path = "/new/path"
+
+    # PATTERN 2: Component with Nested Instantiation
+    cfg2["model"].attn_config.num_heads = 64
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    cfg2["optimizer"].kwargs["lr"] = 1e-4
+
+    model2 = cfg2["model"].build()
+    optimizer2 = cfg2["optimizer"].build(model2.parameters())
+
+    # =========================================================================
+    # Scenario 3: Config Composition
+    # =========================================================================
+    def llama3_2_1b_large_lr():
+        """Variant with larger learning rate and different model config."""
+        base = llama3_2_1b_full()
+        # Overrides
+        base["optimizer"].kwargs["lr"] = 1e-3
+        base["model"].attn_config.num_heads = 64
+        return base
+
+    cfg_variant = llama3_2_1b_large_lr()
+    model_variant = cfg_variant["model"].build()
+    optimizer_variant = cfg_variant["optimizer"].build(model_variant.parameters())
+    assert optimizer_variant.param_groups[0]["lr"] == 1e-3
diff --git a/brainstorming/configs/config_dicts.py b/brainstorming/configs/config_dicts.py
new file mode 100644
index 000000000..80cb987c0
--- /dev/null
+++ b/brainstorming/configs/config_dicts.py
@@ -0,0 +1,130 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Config using Plain Python Dicts.
+
+Pros:
+- Extremely simple
+- No dependencies
+- Easy to understand
+- Flexible
+
+Cons:
+- No type hints (cfg["batch_szie"] typo won't be caught)
+- No validation (cfg["batch_size"] = "invalid" won't error)
+- Very loose, users can pass anything
+"""
+
+import torch.optim
+
+from mock import llama3_2_1b, llama3_tokenizer, MultiHeadAttention
+
+
+def llama3_2_1b_full():
+    output_dir = "/tmp/torchtune/llama3_2_1B/full"
+    batch_size = 4
+
+    return {
+        "output_dir": output_dir,
+        # PATTERN 1: Simple Component Instantiation
+        "tokenizer": {
+            "cls": llama3_tokenizer,
+            "kwargs": {
+                "path": "/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model",
+            },
+        },
+        # PATTERN 2: Component with Nested Instantiation
+        "model": {
+            "cls": llama3_2_1b,
+            "kwargs": {
+                "attn_config": {
+                    "cls": MultiHeadAttention,
+                    "kwargs": {
+                        "num_heads": 32,
+                    },
+                }
+            },
+        },
+        # PATTERN 3: Component Needing Runtime Args (Partial)
+        "optimizer": {
+            "cls": torch.optim.AdamW,
+            "kwargs": {
+                "lr": 2e-5,
+            },
+        },
+        # PATTERN 4: Non-Instantiated Config Block (Plain Data)
+        "data_args": {
+            "batch_size": batch_size,
+            "shuffle": True,
+        },
+        # PATTERN 5: Plain Top-Level Hyperparameters
+        "epochs": 1,
+        "gradient_accumulation_steps": 8,
+    }
+
+
+if __name__ == "__main__":
+    # =========================================================================
+    # Scenario 1: Basic Instantiation
+    # =========================================================================
+    cfg = llama3_2_1b_full()
+
+    # PATTERN 1: Simple Component Instantiation
+    tokenizer = cfg["tokenizer"]["cls"](**cfg["tokenizer"]["kwargs"])
+
+    # PATTERN 2: Component with Nested Instantiation
+    attn_config = cfg["model"]["kwargs"]["attn_config"]["cls"](
+        **cfg["model"]["kwargs"]["attn_config"]["kwargs"]
+    )
+    model = cfg["model"]["cls"](attn_config=attn_config)
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    optimizer = cfg["optimizer"]["cls"](
+        model.parameters(), **cfg["optimizer"]["kwargs"]
+    )
+
+    # =========================================================================
+    # Scenario 2: Override Config Values
+    # =========================================================================
+    cfg2 = llama3_2_1b_full()
+
+    # PATTERN 1: Simple Component Instantiation
+    cfg2["tokenizer"]["kwargs"]["path"] = "/new/tokenizer"
+
+    # PATTERN 2: Component with Nested Instantiation
+    cfg2["model"]["kwargs"]["attn_config"]["kwargs"]["num_heads"] = 64
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    cfg2["optimizer"]["kwargs"]["lr"] = 1e-4
+
+    model2 = cfg2["model"]["cls"](
+        attn_config=cfg2["model"]["kwargs"]["attn_config"]["cls"](
+            **cfg2["model"]["kwargs"]["attn_config"]["kwargs"]
+        )
+    )
+    optimizer2 = cfg2["optimizer"]["cls"](
+        model2.parameters(), **cfg2["optimizer"]["kwargs"]
+    )
+
+    # =========================================================================
+    # Scenario 3: Config Composition
+    # =========================================================================
+    def llama3_2_1b_large_lr():
+        """Variant with larger learning rate."""
+        base = llama3_2_1b_full()
+        base["optimizer"]["kwargs"]["lr"] = 1e-3
+        base["model"]["kwargs"]["attn_config"]["kwargs"]["num_heads"] = 64
+        return base
+
+    cfg_variant = llama3_2_1b_large_lr()
+    attn_config_variant = cfg_variant["model"]["kwargs"]["attn_config"]["cls"](
+        **cfg_variant["model"]["kwargs"]["attn_config"]["kwargs"]
+    )
+    model_variant = cfg_variant["model"]["cls"](attn_config=attn_config_variant)
+    optimizer_variant = cfg_variant["optimizer"]["cls"](
+        model_variant.parameters(), **cfg_variant["optimizer"]["kwargs"]
+    )
diff --git a/brainstorming/configs/config_fiddle.py b/brainstorming/configs/config_fiddle.py
new file mode 100644
index 000000000..8ffe26b16
--- /dev/null
+++ b/brainstorming/configs/config_fiddle.py
@@ -0,0 +1,122 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Config using Fiddle approach (Google's lazy instantiation).
+
+Fiddle provides dict-style config with lazy instantiation.
+
+Pros:
+- Easy to override before building
+- Clear separation: config definition vs instantiation
+
+Cons:
+- External dependency (pip install fiddle-config)
+- Learning curve
+- Less common outside Google
+"""
+
+import fiddle as fdl
+import torch.optim
+
+from mock import llama3_2_1b, llama3_tokenizer, MultiHeadAttention
+
+
+# ======================================================================
+# Config Factory Function
+# ======================================================================
+
+
+def llama3_2_1b_full():
+    output_dir = "/tmp/torchtune/llama3_2_1B/full"
+    batch_size = 4
+
+    # Start with empty dict and build step by step
+    cfg = {}
+    cfg["output_dir"] = output_dir
+
+    # PATTERN 1: Simple Component Instantiation
+    cfg["tokenizer"] = fdl.Config(
+        llama3_tokenizer,
+        path="/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model",
+    )
+
+    # PATTERN 2: Component with Nested Instantiation
+    cfg["model"] = fdl.Config(
+        llama3_2_1b,
+        attn_config=fdl.Config(MultiHeadAttention, num_heads=32),
+    )
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    cfg["optimizer"] = fdl.Partial(
+        torch.optim.AdamW,
+        lr=2e-5,
+    )
+
+    # PATTERN 4: Non-Instantiated Config Block (Plain Data)
+    cfg["data_args"] = {
+        "batch_size": batch_size,
+        "shuffle": True,
+    }
+
+    # PATTERN 5: Plain Top-Level Hyperparameters
+    cfg["epochs"] = 1
+    cfg["gradient_accumulation_steps"] = 8
+
+    return cfg
+
+
+if __name__ == "__main__":
+
+    # =========================================================================
+    # Scenario 1: Basic Instantiation
+    # =========================================================================
+    cfg = llama3_2_1b_full()
+
+    # PATTERN 1: Simple Component Instantiation
+    tokenizer = fdl.build(cfg["tokenizer"])
+
+    # PATTERN 2: Component with Nested Instantiation
+    model = fdl.build(cfg["model"])
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    optimizer_partial = fdl.build(cfg["optimizer"])
+    optimizer = optimizer_partial(params=model.parameters())
+
+    # =========================================================================
+    # Scenario 2: Override Before Build
+    # =========================================================================
+    cfg2 = llama3_2_1b_full()
+
+    # PATTERN 1: Simple Component Instantiation
+    cfg2["tokenizer"].path = "/new/path"
+
+    # PATTERN 2: Component with Nested Instantiation
+    cfg2["model"].attn_config.num_heads = 64
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    cfg2["optimizer"].lr = 1e-4
+
+    model2 = fdl.build(cfg2["model"])
+    optimizer_partial2 = fdl.build(cfg2["optimizer"])
+    optimizer2 = optimizer_partial2(params=model2.parameters())
+
+    assert cfg2["model"].attn_config.num_heads == 64
+
+    # =========================================================================
+    # Scenario 3: Config Composition (Base + Variant)
+    # =========================================================================
+    def llama3_2_1b_large_lr():
+        """Variant with larger learning rate."""
+        base = llama3_2_1b_full()
+        base["optimizer"].lr = 1e-3
+        base["model"].attn_config.num_heads = 64
+        return base
+
+    cfg_variant = llama3_2_1b_large_lr()
+    model_variant = fdl.build(cfg_variant["model"])
+    optimizer_partial_variant = fdl.build(cfg_variant["optimizer"])
+    optimizer_variant = optimizer_partial_variant(params=model_variant.parameters())
diff --git a/brainstorming/configs/config_hydra.py b/brainstorming/configs/config_hydra.py
new file mode 100644
index 000000000..6da15dce1
--- /dev/null
+++ b/brainstorming/configs/config_hydra.py
@@ -0,0 +1,83 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Config using Hydra (YAML-based lazy instantiation).
+
+Pros:
+- YAML syntax (human-readable)
+- Native composition of yamls
+- Lazy instantiation via hydra.utils.instantiate
+- Command-line override for free (--optimizer.lr=1e-4)
+
+Cons:
+- External dependency (pip install hydra-core)
+- yaml is not .py
+"""
+
+import os
+
+from hydra import compose, initialize_config_dir
+from hydra.utils import instantiate
+
+
+def load_config():
+    """Load baseline.yaml using Hydra."""
+    config_dir = os.path.abspath(os.path.dirname(__file__))
+
+    with initialize_config_dir(config_dir=config_dir, version_base=None):
+        cfg = compose(config_name="baseline")
+
+    return cfg
+
+
+if __name__ == "__main__":
+    # =========================================================================
+    # Scenario 1: Basic Instantiation
+    # =========================================================================
+    cfg = load_config()
+
+    # PATTERN 1: Simple Component Instantiation
+    tokenizer = instantiate(cfg.tokenizer)
+
+    # PATTERN 2: Component with Nested Instantiation
+    model = instantiate(cfg.model)
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    optimizer_partial = instantiate(cfg.optimizer)
+    optimizer = optimizer_partial(params=model.parameters())
+
+    # =========================================================================
+    # Scenario 2: Override Config Values
+    # =========================================================================
+    cfg2 = load_config()
+
+    # PATTERN 1: Simple Component Instantiation
+    cfg2.tokenizer.path = "/new/path"
+    tokenizer2 = instantiate(cfg2.tokenizer)
+
+    # PATTERN 2: Component with Nested Instantiation
+    cfg2.model.attn_config.num_heads = 64
+    model2 = instantiate(cfg2.model)
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    cfg2.optimizer.lr = 1e-4
+    optimizer_partial2 = instantiate(cfg2.optimizer)
+    optimizer2 = optimizer_partial2(params=model2.parameters())
+
+    # =========================================================================
+    # Scenario 3: Config Composition (Base + Variant)
+    # =========================================================================
+    # Load variant config that uses defaults to inherit from baseline
+    config_dir = os.path.abspath(os.path.dirname(__file__))
+    with initialize_config_dir(config_dir=config_dir, version_base=None):
+        cfg_variant = compose(config_name="variant/baseline_different_bsz")
+
+    # Verify the variant has inherited from baseline and overridden batch_size
+    assert cfg_variant.data_args.batch_size == 32
+
+    # Can instantiate components from the variant config
+    model_variant = instantiate(cfg_variant.model)
diff --git a/brainstorming/configs/config_partial.py b/brainstorming/configs/config_partial.py
new file mode 100644
index 000000000..699af7c1d
--- /dev/null
+++ b/brainstorming/configs/config_partial.py
@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Config using functools.partial approach (stdlib lazy instantiation).
+
+
+Pros:
+- Stdlib only (no dependencies)
+- Lazy instantiation
+
+Cons:
+- Confusing if an object is a partial or not
+- No validation
+"""
+
+from dataclasses import dataclass
+from functools import partial
+
+import torch.optim
+
+from mock import llama3_2_1b, llama3_tokenizer, MultiHeadAttention
+
+
+@dataclass
+class DataArgs:
+    """Plain dataclass for non-instantiated config block (PATTERN 4)."""
+
+    batch_size: int = 4
+    shuffle: bool = True
+
+
+def llama3_2_1b_full():
+    output_dir = "/tmp/torchtune/llama3_2_1B/full"
+
+    return {
+        "output_dir": output_dir,
+        # PATTERN 1: Simple Component Instantiation
+        "tokenizer": partial(
+            llama3_tokenizer,
+            path="/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model",
+        ),
+        # PATTERN 2: Component with Nested Instantiation
+        "model": partial(
+            llama3_2_1b,
+            attn_config=partial(MultiHeadAttention, num_heads=32),
+        ),
+        # PATTERN 3: Component Needing Runtime Args (Partial)
+        "optimizer": partial(
+            torch.optim.AdamW,
+            lr=2e-5,
+        ),
+        # PATTERN 4: Non-Instantiated Config Block (Plain Data)
+        "data_args": DataArgs(
+            batch_size=4,
+            shuffle=True,
+        ),
+        # PATTERN 5: Plain Top-Level Hyperparameters
+        "epochs": 1,
+        "gradient_accumulation_steps": 8,
+    }
+
+
+if __name__ == "__main__":
+    # =========================================================================
+    # Scenario 1: Basic Instantiation
+    # =========================================================================
+    cfg = llama3_2_1b_full()
+
+    # PATTERN 1: Simple Component Instantiation
+    tokenizer = cfg["tokenizer"]()
+
+    # PATTERN 2: Component with Nested Instantiation
+    model = cfg["model"]()
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    optimizer = cfg["optimizer"](params=model.parameters())
+
+    # =========================================================================
+    # Scenario 2: Override Config Values
+    # =========================================================================
+    cfg2 = llama3_2_1b_full()
+
+    # PATTERN 1: Simple Component Instantiation
+    tokenizer2 = cfg2["tokenizer"](path="/new/path")
+
+    # PATTERN 2: Component with Nested Instantiation
+    inner_partial = cfg2["model"].keywords["attn_config"]
+    inner_partial.keywords["num_heads"] = 64
+    model2 = cfg2["model"]()
+
+    # PATTERN 3: Component Needing Runtime Args (Partial)
+    optimizer2 = cfg2["optimizer"](params=model2.parameters(), lr=1e-4)
+
+    # =========================================================================
+    # Scenario 3: Config Composition
+    # =========================================================================
+    def llama3_2_1b_large_lr():
+        """Variant with larger learning rate and more attention heads."""
+        base = llama3_2_1b_full()
+        base["optimizer"].keywords["lr"] = 1e-3
+        base["model"].keywords["attn_config"].keywords["num_heads"] = 64
+        return base
+
+    cfg_variant = llama3_2_1b_large_lr()
+    model_variant = cfg_variant["model"]()
+    optimizer_variant = cfg_variant["optimizer"](params=model_variant.parameters())
diff --git a/brainstorming/configs/mock.py b/brainstorming/configs/mock.py
new file mode 100644
index 000000000..11f65735f
--- /dev/null
+++ b/brainstorming/configs/mock.py
@@ -0,0 +1,79 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Mock implementations for demonstration purposes.
+
+In a real project, these would be actual imports from:
+- torchtune.models.llama3 import llama3_tokenizer
+- torchtune.models.llama3_2 import llama3_2_1b
+- torchtune.modules.attention import MultiHeadAttention
+- torchtune.training import FullModelHFCheckpointer
+- etc.
+"""
+
+import torch
+import torch.nn as nn
+import torch.optim
+
+
+# =============================================================================
+# Mock Functions and Classes (Standard)
+# =============================================================================
+
+
+llama3_tokenizer = lambda path, **kwargs: type("Tokenizer", (), {})()
+
+
+def llama3_2_1b(attn_config=None):
+    """Mock Llama model."""
+
+    class LlamaModel(nn.Module):
+        def __init__(self):
+            super().__init__()
+            if isinstance(attn_config, nn.Module):
+                self.attn = attn_config
+            else:
+                # partial
+                self.attn = attn_config()
+            self.dummy = nn.Parameter(torch.zeros(10, 10))
+
+        def parameters(self, recurse=True):
+            return super().parameters(recurse)
+
+    return LlamaModel()
+
+
+alpaca_cleaned_dataset = lambda tokenizer, packed=False, split="train": type(
+    "Dataset", (), {}
+)()
+
+
+class MultiHeadAttention(nn.Module):
+    """Mock MultiHeadAttention module."""
+
+    def __init__(self, num_heads=32, **kwargs):
+        super().__init__()
+        self.num_heads = num_heads
+
+    def forward(self, x):
+        return x
+
+
+class FullModelHFCheckpointer:
+    """Mock checkpoint loader."""
+
+    __slots__ = ("checkpoint_dir",)
+
+    def __init__(self, checkpoint_dir, **kwargs):
+        self.checkpoint_dir = checkpoint_dir
+
+
+LinearCrossEntropyLoss = type("LinearCrossEntropyLoss", (), {})
+
+DiskLogger = type("DiskLogger", (), {})
+
+setup_torch_profiler = lambda enabled, output_dir: None
diff --git a/brainstorming/configs/mock_with_config.py b/brainstorming/configs/mock_with_config.py
new file mode 100644
index 000000000..86ff8160c
--- /dev/null
+++ b/brainstorming/configs/mock_with_config.py
@@ -0,0 +1,91 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Mock implementations with inner Config classes for config_dataclasses.py.
+
+These show the pattern where each class has an inner Config class.
+In a real project, you would modify your own classes to add these Config inner classes.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Type
+
+import torch
+import torch.nn as nn
+
+
+# =============================================================================
+# Mocks with Inner Config Classes
+# =============================================================================
+
+
+class TokenizerWithConfig:
+    """Custom tokenizer with inner Config class."""
+
+    @dataclass
+    class Config:
+        path: str
+
+        def build(self) -> "TokenizerWithConfig":
+            return TokenizerWithConfig(self)
+
+    def __init__(self, config: Config):
+        self.config = config
+        self.path = config.path
+
+
+class MultiHeadAttentionWithConfig(nn.Module):
+    """Attention module with inner Config class."""
+
+    @dataclass
+    class Config:
+        num_heads: int
+
+        def build(self) -> "MultiHeadAttentionWithConfig":
+            return MultiHeadAttentionWithConfig(self)
+
+    def __init__(self, config: Config):
+        super().__init__()
+        self.config = config
+        self.num_heads = config.num_heads
+
+
+class LlamaModelWithConfig(nn.Module):
+    """Model with inner Config class that contains nested config."""
+
+    @dataclass
+    class Config:
+        attn_config: MultiHeadAttentionWithConfig.Config
+
+        def build(self) -> "LlamaModelWithConfig":
+            return LlamaModelWithConfig(self)
+
+    def __init__(self, config: Config):
+        super().__init__()
+        self.config = config
+        self.attn = config.attn_config.build()
+
+    def parameters(self):
+        """Mock parameters for demo."""
+        return iter([torch.zeros(10, 10)])
+
+
+# =============================================================================
+# Generic Config Wrappers for External Libraries
+# =============================================================================
+
+
+@dataclass
+class ComponentConfig:
+    """Generic wrapper for any component class."""
+
+    component_cls: Type
+    kwargs: dict[str, Any]
+
+    def build(self, *args, **runtime_kwargs):
+        merged_kwargs = {**self.kwargs, **runtime_kwargs}
+        return self.component_cls(*args, **merged_kwargs)
diff --git a/brainstorming/configs/variant/baseline_different_bsz.yaml b/brainstorming/configs/variant/baseline_different_bsz.yaml
new file mode 100644
index 000000000..e09126dde
--- /dev/null
+++ b/brainstorming/configs/variant/baseline_different_bsz.yaml
@@ -0,0 +1,12 @@
+# @package _global_
+
+# Variant config: inherits from baseline with different batch size
+# This demonstrates Hydra's config composition via defaults list
+
+defaults:
+  - /baseline
+  - _self_
+
+# Override batch size (applied after baseline due to _self_ position)
+data_args:
+  batch_size: 32