We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents c988cf2 + 893aaa5 commit beaf2f2Copy full SHA for beaf2f2
megatron/arguments.py
@@ -378,6 +378,10 @@ def validate_args(args, defaults={}):
378
if args.sequence_parallel:
379
args.async_tensor_model_parallel_allreduce = False
380
381
+ if args.use_flash_attn:
382
+ assert not args.reset_attention_mask, \
383
+ "Flash Attention doesn't support arbitrary attention masks. Please turn off reset-attention-mask"
384
+
385
_print_args(args)
386
return args
387
0 commit comments