|
| 1 | +""" Since arguments are duplicated in run_pretraining.py and sagemaker_pretraining.py, they have |
| 2 | +been abstracted into this file. It also makes the training scripts much shorter. |
| 3 | +""" |
| 4 | + |
| 5 | +import argparse |
| 6 | +import os |
| 7 | + |
| 8 | + |
| 9 | +def populate_pretraining_parser(parser: argparse.ArgumentParser) -> None: |
| 10 | + parser.add_argument("--model_dir", help="Unused, but passed by SageMaker") |
| 11 | + parser.add_argument("--model_type", default="albert", choices=["albert", "bert"]) |
| 12 | + parser.add_argument("--model_size", default="base", choices=["base", "large"]) |
| 13 | + parser.add_argument("--batch_size", type=int, default=32, help="per GPU") |
| 14 | + parser.add_argument("--gradient_accumulation_steps", type=int, default=2) |
| 15 | + parser.add_argument("--max_seq_length", type=int, default=512, choices=[128, 512]) |
| 16 | + parser.add_argument("--warmup_steps", type=int, default=3125) |
| 17 | + parser.add_argument("--total_steps", type=int, default=125000) |
| 18 | + parser.add_argument("--learning_rate", type=float, default=0.00176) |
| 19 | + parser.add_argument("--end_learning_rate", type=float, default=3e-5) |
| 20 | + parser.add_argument("--learning_rate_decay_power", type=float, default=1.0) |
| 21 | + parser.add_argument("--hidden_dropout_prob", type=float, default=0.0) |
| 22 | + parser.add_argument("--max_grad_norm", type=float, default=1.0) |
| 23 | + parser.add_argument("--optimizer", default="lamb", choices=["lamb", "adam"]) |
| 24 | + parser.add_argument("--name", default="", help="Additional info to append to metadata") |
| 25 | + parser.add_argument("--log_frequency", type=int, default=1000) |
| 26 | + parser.add_argument( |
| 27 | + "--load_from", default="scratch", choices=["scratch", "checkpoint", "huggingface"], |
| 28 | + ) |
| 29 | + parser.add_argument("--checkpoint_path", default=None) |
| 30 | + parser.add_argument( |
| 31 | + "--fsx_prefix", |
| 32 | + default="/fsx", |
| 33 | + choices=["/fsx", "/opt/ml/input/data/training"], |
| 34 | + help="Change to /opt/ml/input/data/training on SageMaker", |
| 35 | + ) |
| 36 | + # SageMaker does not work with 'store_const' args, since it parses into a dictionary |
| 37 | + # We will treat any value not equal to None as True, and use --skip_xla=true |
| 38 | + parser.add_argument( |
| 39 | + "--skip_xla", |
| 40 | + choices=["true"], |
| 41 | + help="For debugging. Faster startup time, slower runtime, more GPU vRAM.", |
| 42 | + ) |
| 43 | + parser.add_argument( |
| 44 | + "--eager", |
| 45 | + choices=["true"], |
| 46 | + help="For debugging. Faster launch, slower runtime, more GPU vRAM.", |
| 47 | + ) |
| 48 | + parser.add_argument( |
| 49 | + "--skip_sop", choices=["true"], help="Only use MLM loss, and exclude the SOP loss.", |
| 50 | + ) |
| 51 | + parser.add_argument( |
| 52 | + "--skip_mlm", choices=["true"], help="Only use SOP loss, and exclude the MLM loss.", |
| 53 | + ) |
| 54 | + parser.add_argument( |
| 55 | + "--pre_layer_norm", |
| 56 | + choices=["true"], |
| 57 | + help="Place layer normalization before the attention & FFN, rather than after adding the residual connection. https://openreview.net/pdf?id=B1x8anVFPr", |
| 58 | + ) |
| 59 | + parser.add_argument("--extra_squad_steps", type=str) |
| 60 | + parser.add_argument("--fast_squad", choices=["true"]) |
| 61 | + parser.add_argument("--dummy_eval", choices=["true"]) |
| 62 | + parser.add_argument("--seed", type=int, default=42) |
| 63 | + |
| 64 | + |
| 65 | +def populate_squad_parser(parser: argparse.ArgumentParser) -> None: |
| 66 | + # Model loading |
| 67 | + parser.add_argument("--model_type", default="albert", choices=["albert", "bert"]) |
| 68 | + parser.add_argument("--model_size", default="base", choices=["base", "large"]) |
| 69 | + parser.add_argument("--load_from", required=True) |
| 70 | + parser.add_argument("--load_step", type=int) |
| 71 | + parser.add_argument("--skip_xla", choices=["true"]) |
| 72 | + parser.add_argument("--eager", choices=["true"]) |
| 73 | + parser.add_argument( |
| 74 | + "--pre_layer_norm", |
| 75 | + choices=["true"], |
| 76 | + help="See https://github.com/huggingface/transformers/pull/3929", |
| 77 | + ) |
| 78 | + parser.add_argument( |
| 79 | + "--fsx_prefix", |
| 80 | + default="/fsx", |
| 81 | + choices=["/fsx", "/opt/ml/input/data/training"], |
| 82 | + help="Change to /opt/ml/input/data/training on SageMaker", |
| 83 | + ) |
| 84 | + # Hyperparameters from https://arxiv.org/pdf/1909.11942.pdf#page=17 |
| 85 | + parser.add_argument("--batch_size", default=6, type=int) |
| 86 | + parser.add_argument("--total_steps", default=8144, type=int) |
| 87 | + parser.add_argument("--warmup_steps", default=814, type=int) |
| 88 | + parser.add_argument("--learning_rate", default=3e-5, type=float) |
| 89 | + parser.add_argument("--dataset", default="squadv2") |
| 90 | + parser.add_argument("--seed", type=int, default=42) |
| 91 | + # Logging information |
| 92 | + parser.add_argument("--name", default="default") |
| 93 | + parser.add_argument("--validate_frequency", default=1000, type=int) |
| 94 | + parser.add_argument("--checkpoint_frequency", default=500, type=int) |
| 95 | + parser.add_argument("--model_dir", help="Unused, but passed by SageMaker") |
| 96 | + |
| 97 | + |
| 98 | +def populate_sagemaker_parser(parser: argparse.ArgumentParser) -> None: |
| 99 | + # SageMaker parameters |
| 100 | + parser.add_argument( |
| 101 | + "--source_dir", |
| 102 | + help="For example, /Users/myusername/Desktop/deep-learning-models/models/nlp/albert", |
| 103 | + ) |
| 104 | + parser.add_argument("--entry_point", default="run_pretraining.py") |
| 105 | + parser.add_argument("--role", default=os.environ["SAGEMAKER_ROLE"]) |
| 106 | + parser.add_argument("--image_name", default=os.environ["SAGEMAKER_IMAGE_NAME"]) |
| 107 | + parser.add_argument("--fsx_id", default=os.environ["SAGEMAKER_FSX_ID"]) |
| 108 | + parser.add_argument( |
| 109 | + "--subnet_ids", help="Comma-separated string", default=os.environ["SAGEMAKER_SUBNET_IDS"] |
| 110 | + ) |
| 111 | + parser.add_argument( |
| 112 | + "--security_group_ids", |
| 113 | + help="Comma-separated string", |
| 114 | + default=os.environ["SAGEMAKER_SECURITY_GROUP_IDS"], |
| 115 | + ) |
| 116 | + # Instance specs |
| 117 | + parser.add_argument( |
| 118 | + "--instance_type", |
| 119 | + type=str, |
| 120 | + default="ml.p3dn.24xlarge", |
| 121 | + choices=["ml.p3dn.24xlarge", "ml.p3.16xlarge", "ml.g4dn.12xlarge"], |
| 122 | + ) |
| 123 | + parser.add_argument("--instance_count", type=int, default=1) |
0 commit comments