@@ -2,7 +2,7 @@ use std::collections::hash_map::Entry;
2
2
use std:: marker:: PhantomData ;
3
3
use std:: ops:: Range ;
4
4
5
- use rustc_abi:: { BackendRepr , FieldIdx , FieldsShape , Size , VariantIdx } ;
5
+ use rustc_abi:: { BackendRepr , FieldIdx , FieldsShape , ScalableElt , Size , VariantIdx } ;
6
6
use rustc_data_structures:: fx:: FxHashMap ;
7
7
use rustc_index:: IndexVec ;
8
8
use rustc_middle:: middle:: codegen_fn_attrs:: CodegenFnAttrFlags ;
@@ -361,6 +361,49 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
361
361
return ;
362
362
}
363
363
364
+ // Don't spill `<vscale x N x i1>` for `N != 16`:
365
+ //
366
+ // SVE predicates are only one bit for each byte in an SVE vector (which makes
367
+ // sense, the predicate only needs to keep track of whether a lane is
368
+ // enabled/disabled). i.e. a `<vscale x 16 x i8>` vector has a `<vscale x 16 x i1>`
369
+ // predicate type. `<vscale x 16 x i1>` corresponds to two bytes of storage,
370
+ // multiplied by the `vscale`, with one bit for each of the sixteen lanes.
371
+ //
372
+ // For a vector with fewer elements, such as `svint32_t`/`<vscale x 4 x i32>`,
373
+ // while only a `<vscale x 4 x i1>` predicate type would be strictly necessary,
374
+ // relevant intrinsics still take a `svbool_t`/`<vscale x 16 x i1>` - this is
375
+ // because a `<vscale x 4 x i1>` is only half of a byte (for `vscale=1`), and with
376
+ // memory being byte-addressable, it's unclear how to store that.
377
+ //
378
+ // Due to this, LLVM ultimately decided not to support stores of `<vscale x N x i1>`
379
+ // for `N != 16`. As for `vscale=1` and `N` fewer than sixteen, partial bytes would
380
+ // need to be stored (except for `N=8`, but that also isn't supported). `N` can
381
+ // never be greater than sixteen as that ends up larger than the 128-bit increment
382
+ // size.
383
+ //
384
+ // Internally, with an intrinsic operating on a `svint32_t`/`<vscale x 4 x i32>`
385
+ // (for example), the intrinsic takes the `svbool_t`/`<vscale x 16 x i1>` predicate
386
+ // and casts it to a `svbool4_t`/`<vscale x 4 x i1>`. Therefore, it's important that
387
+ // the `<vscale x 4 x i32>` never spills because that'll cause errors during
388
+ // instruction selection. Spilling to the stack to create debuginfo for these
389
+ // intermediate values must be avoided and won't degrade the debugging experience
390
+ // anyway.
391
+ if operand. layout . ty . is_scalable_vector ( )
392
+ && bx. sess ( ) . target . arch == "aarch64"
393
+ && let ty:: Adt ( adt, args) = & operand. layout . ty . kind ( )
394
+ && let Some ( marker_type_field) =
395
+ adt. non_enum_variant ( ) . fields . get ( FieldIdx :: from_u32 ( 0 ) )
396
+ {
397
+ let marker_type = marker_type_field. ty ( bx. tcx ( ) , args) ;
398
+ // i.e. `<vscale x N x i1>` when `N != 16`
399
+ if let ty:: Slice ( element_ty) = marker_type. kind ( )
400
+ && element_ty. is_bool ( )
401
+ && adt. repr ( ) . scalable != Some ( ScalableElt :: ElementCount ( 16 ) )
402
+ {
403
+ return ;
404
+ }
405
+ }
406
+
364
407
Self :: spill_operand_to_stack ( * operand, name, bx)
365
408
}
366
409
0 commit comments