| 
 | 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates.  | 
 | 2 | +# All rights reserved.  | 
 | 3 | +#  | 
 | 4 | +# This source code is licensed under the BSD-style license found in the  | 
 | 5 | +# LICENSE file in the root directory of this source tree.  | 
 | 6 | + | 
 | 7 | +"""  | 
 | 8 | +Config using Plain Python Dicts.  | 
 | 9 | +
  | 
 | 10 | +Pros:  | 
 | 11 | +- Extremely simple  | 
 | 12 | +- No dependencies  | 
 | 13 | +- Easy to understand  | 
 | 14 | +- Flexible  | 
 | 15 | +
  | 
 | 16 | +Cons:  | 
 | 17 | +- No type hints (cfg["batch_szie"] typo won't be caught)  | 
 | 18 | +- No validation (cfg["batch_size"] = "invalid" won't error)  | 
 | 19 | +- Very loose, users can pass anything  | 
 | 20 | +"""  | 
 | 21 | + | 
 | 22 | +import torch.optim  | 
 | 23 | + | 
 | 24 | +from mock import llama3_2_1b, llama3_tokenizer, MultiHeadAttention  | 
 | 25 | + | 
 | 26 | + | 
 | 27 | +def llama3_2_1b_full():  | 
 | 28 | +    output_dir = "/tmp/torchtune/llama3_2_1B/full"  | 
 | 29 | +    batch_size = 4  | 
 | 30 | + | 
 | 31 | +    return {  | 
 | 32 | +        "output_dir": output_dir,  | 
 | 33 | +        # PATTERN 1: Simple Component Instantiation  | 
 | 34 | +        "tokenizer": {  | 
 | 35 | +            "cls": llama3_tokenizer,  | 
 | 36 | +            "kwargs": {  | 
 | 37 | +                "path": "/tmp/Llama-3.2-1B-Instruct/original/tokenizer.model",  | 
 | 38 | +            },  | 
 | 39 | +        },  | 
 | 40 | +        # PATTERN 2: Component with Nested Instantiation  | 
 | 41 | +        "model": {  | 
 | 42 | +            "cls": llama3_2_1b,  | 
 | 43 | +            "kwargs": {  | 
 | 44 | +                "attn_config": {  | 
 | 45 | +                    "cls": MultiHeadAttention,  | 
 | 46 | +                    "kwargs": {  | 
 | 47 | +                        "num_heads": 32,  | 
 | 48 | +                    },  | 
 | 49 | +                }  | 
 | 50 | +            },  | 
 | 51 | +        },  | 
 | 52 | +        # PATTERN 3: Component Needing Runtime Args (Partial)  | 
 | 53 | +        "optimizer": {  | 
 | 54 | +            "cls": torch.optim.AdamW,  | 
 | 55 | +            "kwargs": {  | 
 | 56 | +                "lr": 2e-5,  | 
 | 57 | +            },  | 
 | 58 | +        },  | 
 | 59 | +        # PATTERN 4: Non-Instantiated Config Block (Plain Data)  | 
 | 60 | +        "data_args": {  | 
 | 61 | +            "batch_size": batch_size,  | 
 | 62 | +            "shuffle": True,  | 
 | 63 | +        },  | 
 | 64 | +        # PATTERN 5: Plain Top-Level Hyperparameters  | 
 | 65 | +        "epochs": 1,  | 
 | 66 | +        "gradient_accumulation_steps": 8,  | 
 | 67 | +    }  | 
 | 68 | + | 
 | 69 | + | 
 | 70 | +if __name__ == "__main__":  | 
 | 71 | +    # =========================================================================  | 
 | 72 | +    # Scenario 1: Basic Instantiation  | 
 | 73 | +    # =========================================================================  | 
 | 74 | +    cfg = llama3_2_1b_full()  | 
 | 75 | + | 
 | 76 | +    # PATTERN 1: Simple Component Instantiation  | 
 | 77 | +    tokenizer = cfg["tokenizer"]["cls"](**cfg["tokenizer"]["kwargs"])  | 
 | 78 | + | 
 | 79 | +    # PATTERN 2: Component with Nested Instantiation  | 
 | 80 | +    attn_config = cfg["model"]["kwargs"]["attn_config"]["cls"](  | 
 | 81 | +        **cfg["model"]["kwargs"]["attn_config"]["kwargs"]  | 
 | 82 | +    )  | 
 | 83 | +    model = cfg["model"]["cls"](attn_config=attn_config)  | 
 | 84 | + | 
 | 85 | +    # PATTERN 3: Component Needing Runtime Args (Partial)  | 
 | 86 | +    optimizer = cfg["optimizer"]["cls"](  | 
 | 87 | +        model.parameters(), **cfg["optimizer"]["kwargs"]  | 
 | 88 | +    )  | 
 | 89 | + | 
 | 90 | +    # =========================================================================  | 
 | 91 | +    # Scenario 2: Override Config Values  | 
 | 92 | +    # =========================================================================  | 
 | 93 | +    cfg2 = llama3_2_1b_full()  | 
 | 94 | + | 
 | 95 | +    # PATTERN 1: Simple Component Instantiation  | 
 | 96 | +    cfg2["tokenizer"]["kwargs"]["path"] = "/new/tokenizer"  | 
 | 97 | + | 
 | 98 | +    # PATTERN 2: Component with Nested Instantiation  | 
 | 99 | +    cfg2["model"]["kwargs"]["attn_config"]["kwargs"]["num_heads"] = 64  | 
 | 100 | + | 
 | 101 | +    # PATTERN 3: Component Needing Runtime Args (Partial)  | 
 | 102 | +    cfg2["optimizer"]["kwargs"]["lr"] = 1e-4  | 
 | 103 | + | 
 | 104 | +    model2 = cfg2["model"]["cls"](  | 
 | 105 | +        attn_config=cfg2["model"]["kwargs"]["attn_config"]["cls"](  | 
 | 106 | +            **cfg2["model"]["kwargs"]["attn_config"]["kwargs"]  | 
 | 107 | +        )  | 
 | 108 | +    )  | 
 | 109 | +    optimizer2 = cfg2["optimizer"]["cls"](  | 
 | 110 | +        model2.parameters(), **cfg2["optimizer"]["kwargs"]  | 
 | 111 | +    )  | 
 | 112 | + | 
 | 113 | +    # =========================================================================  | 
 | 114 | +    # Scenario 3: Config Composition  | 
 | 115 | +    # =========================================================================  | 
 | 116 | +    def llama3_2_1b_large_lr():  | 
 | 117 | +        """Variant with larger learning rate."""  | 
 | 118 | +        base = llama3_2_1b_full()  | 
 | 119 | +        base["optimizer"]["kwargs"]["lr"] = 1e-3  | 
 | 120 | +        base["model"]["kwargs"]["attn_config"]["kwargs"]["num_heads"] = 64  | 
 | 121 | +        return base  | 
 | 122 | + | 
 | 123 | +    cfg_variant = llama3_2_1b_large_lr()  | 
 | 124 | +    attn_config_variant = cfg_variant["model"]["kwargs"]["attn_config"]["cls"](  | 
 | 125 | +        **cfg_variant["model"]["kwargs"]["attn_config"]["kwargs"]  | 
 | 126 | +    )  | 
 | 127 | +    model_variant = cfg_variant["model"]["cls"](attn_config=attn_config_variant)  | 
 | 128 | +    optimizer_variant = cfg_variant["optimizer"]["cls"](  | 
 | 129 | +        model_variant.parameters(), **cfg_variant["optimizer"]["kwargs"]  | 
 | 130 | +    )  | 
0 commit comments