We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c988cf2 commit 893aaa5Copy full SHA for 893aaa5
megatron/arguments.py
@@ -378,6 +378,10 @@ def validate_args(args, defaults={}):
378
if args.sequence_parallel:
379
args.async_tensor_model_parallel_allreduce = False
380
381
+ if args.use_flash_attn:
382
+ assert not args.reset_attention_mask, \
383
+ "Flash Attention doesn't support arbitrary attention masks. Please turn off reset-attention-mask"
384
+
385
_print_args(args)
386
return args
387
0 commit comments