Skip to content

Commit 2b131b8

Browse files
committed
Allow vector-scalar (vs) registers in ppc inline assembly
Where supported, VSX is a 64x128b register set which encompasses both the floating point and vector registers. In the type tests, xvsqrtdp is used as it is the only two-argument vsx opcode supported by all targets on llvm. If you need to copy a vsx register, the preferred way is "xxlor xt, xa, xa".
1 parent 15283f6 commit 2b131b8

File tree

13 files changed

+3018
-183
lines changed

13 files changed

+3018
-183
lines changed

compiler/rustc_codegen_gcc/src/asm.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,7 @@ fn reg_class_to_gcc(reg_class: InlineAsmRegClass) -> &'static str {
698698
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => "b",
699699
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::freg) => "f",
700700
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::vreg) => "v",
701+
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::vsreg) => "wa",
701702
InlineAsmRegClass::PowerPC(
702703
PowerPCInlineAsmRegClass::cr
703704
| PowerPCInlineAsmRegClass::ctr
@@ -778,9 +779,9 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
778779
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::reg) => cx.type_i32(),
779780
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => cx.type_i32(),
780781
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::freg) => cx.type_f64(),
781-
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::vreg) => {
782-
cx.type_vector(cx.type_i32(), 4)
783-
}
782+
InlineAsmRegClass::PowerPC(
783+
PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg,
784+
) => cx.type_vector(cx.type_i32(), 4),
784785
InlineAsmRegClass::PowerPC(
785786
PowerPCInlineAsmRegClass::cr
786787
| PowerPCInlineAsmRegClass::ctr
@@ -957,6 +958,13 @@ fn modifier_to_gcc(
957958
InlineAsmRegClass::LoongArch(_) => None,
958959
InlineAsmRegClass::Mips(_) => None,
959960
InlineAsmRegClass::Nvptx(_) => None,
961+
InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::vsreg) => {
962+
if modifier.is_none() {
963+
Some('x')
964+
} else {
965+
modifier
966+
}
967+
}
960968
InlineAsmRegClass::PowerPC(_) => None,
961969
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg)
962970
| InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => None,

compiler/rustc_codegen_llvm/src/asm.rs

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,7 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) ->
662662
PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => "b",
663663
PowerPC(PowerPCInlineAsmRegClass::freg) => "f",
664664
PowerPC(PowerPCInlineAsmRegClass::vreg) => "v",
665+
PowerPC(PowerPCInlineAsmRegClass::vsreg) => "^wa",
665666
PowerPC(
666667
PowerPCInlineAsmRegClass::cr
667668
| PowerPCInlineAsmRegClass::ctr
@@ -752,6 +753,12 @@ fn modifier_to_llvm(
752753
LoongArch(_) => None,
753754
Mips(_) => None,
754755
Nvptx(_) => None,
756+
PowerPC(PowerPCInlineAsmRegClass::vsreg) => {
757+
// The documentation for the 'x' modifier is missing for llvm, and the gcc
758+
// documentation is simply "use this for any vsx argument". It is needed
759+
// to ensure the correct vsx register number is used.
760+
if modifier.is_none() { Some('x') } else { modifier }
761+
}
755762
PowerPC(_) => None,
756763
RiscV(RiscVInlineAsmRegClass::reg) | RiscV(RiscVInlineAsmRegClass::freg) => None,
757764
RiscV(RiscVInlineAsmRegClass::vreg) => unreachable!("clobber-only"),
@@ -835,6 +842,7 @@ fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &'
835842
PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => cx.type_i32(),
836843
PowerPC(PowerPCInlineAsmRegClass::freg) => cx.type_f64(),
837844
PowerPC(PowerPCInlineAsmRegClass::vreg) => cx.type_vector(cx.type_i32(), 4),
845+
PowerPC(PowerPCInlineAsmRegClass::vsreg) => cx.type_vector(cx.type_i32(), 4),
838846
PowerPC(
839847
PowerPCInlineAsmRegClass::cr
840848
| PowerPCInlineAsmRegClass::ctr
@@ -1065,19 +1073,21 @@ fn llvm_fixup_input<'ll, 'tcx>(
10651073
let value = bx.or(value, bx.const_u32(0xFFFF_0000));
10661074
bx.bitcast(value, bx.type_f32())
10671075
}
1068-
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
1069-
if s.primitive() == Primitive::Float(Float::F32) =>
1070-
{
1076+
(
1077+
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1078+
BackendRepr::Scalar(s),
1079+
) if s.primitive() == Primitive::Float(Float::F32) => {
10711080
let value = bx.insert_element(
10721081
bx.const_undef(bx.type_vector(bx.type_f32(), 4)),
10731082
value,
10741083
bx.const_usize(0),
10751084
);
10761085
bx.bitcast(value, bx.type_vector(bx.type_f32(), 4))
10771086
}
1078-
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
1079-
if s.primitive() == Primitive::Float(Float::F64) =>
1080-
{
1087+
(
1088+
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1089+
BackendRepr::Scalar(s),
1090+
) if s.primitive() == Primitive::Float(Float::F64) => {
10811091
let value = bx.insert_element(
10821092
bx.const_undef(bx.type_vector(bx.type_f64(), 2)),
10831093
value,
@@ -1228,15 +1238,17 @@ fn llvm_fixup_output<'ll, 'tcx>(
12281238
let value = bx.trunc(value, bx.type_i16());
12291239
bx.bitcast(value, bx.type_f16())
12301240
}
1231-
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
1232-
if s.primitive() == Primitive::Float(Float::F32) =>
1233-
{
1241+
(
1242+
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1243+
BackendRepr::Scalar(s),
1244+
) if s.primitive() == Primitive::Float(Float::F32) => {
12341245
let value = bx.bitcast(value, bx.type_vector(bx.type_f32(), 4));
12351246
bx.extract_element(value, bx.const_usize(0))
12361247
}
1237-
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
1238-
if s.primitive() == Primitive::Float(Float::F64) =>
1239-
{
1248+
(
1249+
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1250+
BackendRepr::Scalar(s),
1251+
) if s.primitive() == Primitive::Float(Float::F64) => {
12401252
let value = bx.bitcast(value, bx.type_vector(bx.type_f64(), 2));
12411253
bx.extract_element(value, bx.const_usize(0))
12421254
}
@@ -1370,16 +1382,14 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
13701382
{
13711383
cx.type_f32()
13721384
}
1373-
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
1374-
if s.primitive() == Primitive::Float(Float::F32) =>
1375-
{
1376-
cx.type_vector(cx.type_f32(), 4)
1377-
}
1378-
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
1379-
if s.primitive() == Primitive::Float(Float::F64) =>
1380-
{
1381-
cx.type_vector(cx.type_f64(), 2)
1382-
}
1385+
(
1386+
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1387+
BackendRepr::Scalar(s),
1388+
) if s.primitive() == Primitive::Float(Float::F32) => cx.type_vector(cx.type_f32(), 4),
1389+
(
1390+
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1391+
BackendRepr::Scalar(s),
1392+
) if s.primitive() == Primitive::Float(Float::F64) => cx.type_vector(cx.type_f64(), 2),
13831393
_ => layout.llvm_type(cx),
13841394
}
13851395
}

compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2399,6 +2399,7 @@ symbols! {
23992399
volatile_store,
24002400
vreg,
24012401
vreg_low16,
2402+
vsreg,
24022403
vsx,
24032404
vtable_align,
24042405
vtable_size,

compiler/rustc_target/src/asm/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1251,9 +1251,17 @@ impl InlineAsmClobberAbi {
12511251
r3, r4, r5, r6, r7,
12521252
r8, r9, r10, r11, r12,
12531253

1254-
// f0-f13
1254+
// f0-f13 and their vsx overlays.
12551255
f0, f1, f2, f3, f4, f5, f6, f7,
12561256
f8, f9, f10, f11, f12, f13,
1257+
vs0, vs1, vs2, vs3, vs4, vs5, vs6, vs7,
1258+
vs8, vs9, vs10, vs11, vs12, vs13,
1259+
1260+
// vs14-31, the fpr portion is saved, but the rest of the register is volatile.
1261+
// We can't express that here, so mark the entire vsx register as volatile.
1262+
vs14, vs15, vs16, vs17, vs18, vs19, vs20,
1263+
vs21, vs22, vs23, vs24, vs25, vs26, vs27,
1264+
vs28, vs29, vs30, vs31,
12571265

12581266
// v0-v19
12591267
v0, v1, v2, v3, v4, v5, v6, v7,

compiler/rustc_target/src/asm/powerpc.rs

Lines changed: 148 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def_reg_class! {
1212
reg_nonzero,
1313
freg,
1414
vreg,
15+
vsreg,
1516
cr,
1617
ctr,
1718
lr,
@@ -58,6 +59,10 @@ impl PowerPCInlineAsmRegClass {
5859
altivec: VecI8(16), VecI16(8), VecI32(4), VecF32(4);
5960
vsx: F32, F64, VecI64(2), VecF64(2);
6061
},
62+
// VSX is a superset of altivec.
63+
Self::vsreg => types! {
64+
vsx: F32, F64, VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2);
65+
},
6166
Self::cr | Self::ctr | Self::lr | Self::xer => &[],
6267
}
6368
}
@@ -86,7 +91,7 @@ fn reserved_v20to31(
8691
) -> Result<(), &'static str> {
8792
if target.is_like_aix {
8893
match &*target.options.abi {
89-
"vec-default" => Err("v20-v31 are reserved on vec-default ABI"),
94+
"vec-default" => Err("v20-v31 (vs52-vs63) are reserved on vec-default ABI"),
9095
"vec-extabi" => Ok(()),
9196
_ => unreachable!("unrecognized AIX ABI"),
9297
}
@@ -188,6 +193,71 @@ def_regs! {
188193
v29: vreg = ["v29"] % reserved_v20to31,
189194
v30: vreg = ["v30"] % reserved_v20to31,
190195
v31: vreg = ["v31"] % reserved_v20to31,
196+
vs0: vsreg = ["vs0"],
197+
vs1: vsreg = ["vs1"],
198+
vs2: vsreg = ["vs2"],
199+
vs3: vsreg = ["vs3"],
200+
vs4: vsreg = ["vs4"],
201+
vs5: vsreg = ["vs5"],
202+
vs6: vsreg = ["vs6"],
203+
vs7: vsreg = ["vs7"],
204+
vs8: vsreg = ["vs8"],
205+
vs9: vsreg = ["vs9"],
206+
vs10: vsreg = ["vs10"],
207+
vs11: vsreg = ["vs11"],
208+
vs12: vsreg = ["vs12"],
209+
vs13: vsreg = ["vs13"],
210+
vs14: vsreg = ["vs14"],
211+
vs15: vsreg = ["vs15"],
212+
vs16: vsreg = ["vs16"],
213+
vs17: vsreg = ["vs17"],
214+
vs18: vsreg = ["vs18"],
215+
vs19: vsreg = ["vs19"],
216+
vs20: vsreg = ["vs20"],
217+
vs21: vsreg = ["vs21"],
218+
vs22: vsreg = ["vs22"],
219+
vs23: vsreg = ["vs23"],
220+
vs24: vsreg = ["vs24"],
221+
vs25: vsreg = ["vs25"],
222+
vs26: vsreg = ["vs26"],
223+
vs27: vsreg = ["vs27"],
224+
vs28: vsreg = ["vs28"],
225+
vs29: vsreg = ["vs29"],
226+
vs30: vsreg = ["vs30"],
227+
vs31: vsreg = ["vs31"],
228+
vs32: vsreg = ["vs32"],
229+
vs33: vsreg = ["vs33"],
230+
vs34: vsreg = ["vs34"],
231+
vs35: vsreg = ["vs35"],
232+
vs36: vsreg = ["vs36"],
233+
vs37: vsreg = ["vs37"],
234+
vs38: vsreg = ["vs38"],
235+
vs39: vsreg = ["vs39"],
236+
vs40: vsreg = ["vs40"],
237+
vs41: vsreg = ["vs41"],
238+
vs42: vsreg = ["vs42"],
239+
vs43: vsreg = ["vs43"],
240+
vs44: vsreg = ["vs44"],
241+
vs45: vsreg = ["vs45"],
242+
vs46: vsreg = ["vs46"],
243+
vs47: vsreg = ["vs47"],
244+
vs48: vsreg = ["vs48"],
245+
vs49: vsreg = ["vs49"],
246+
vs50: vsreg = ["vs50"],
247+
vs51: vsreg = ["vs51"],
248+
// vs52 - vs63 are aliases of v20-v31.
249+
vs52: vsreg = ["vs52"] % reserved_v20to31,
250+
vs53: vsreg = ["vs53"] % reserved_v20to31,
251+
vs54: vsreg = ["vs54"] % reserved_v20to31,
252+
vs55: vsreg = ["vs55"] % reserved_v20to31,
253+
vs56: vsreg = ["vs56"] % reserved_v20to31,
254+
vs57: vsreg = ["vs57"] % reserved_v20to31,
255+
vs58: vsreg = ["vs58"] % reserved_v20to31,
256+
vs59: vsreg = ["vs59"] % reserved_v20to31,
257+
vs60: vsreg = ["vs60"] % reserved_v20to31,
258+
vs61: vsreg = ["vs61"] % reserved_v20to31,
259+
vs62: vsreg = ["vs62"] % reserved_v20to31,
260+
vs63: vsreg = ["vs63"] % reserved_v20to31,
191261
cr: cr = ["cr"],
192262
cr0: cr = ["cr0"],
193263
cr1: cr = ["cr1"],
@@ -245,6 +315,15 @@ impl PowerPCInlineAsmReg {
245315
(v8, "8"), (v9, "9"), (v10, "10"), (v11, "11"), (v12, "12"), (v13, "13"), (v14, "14"), (v15, "15");
246316
(v16, "16"), (v17, "17"), (v18, "18"), (v19, "19"), (v20, "20"), (v21, "21"), (v22, "22"), (v23, "23");
247317
(v24, "24"), (v25, "25"), (v26, "26"), (v27, "27"), (v28, "28"), (v29, "29"), (v30, "30"), (v31, "31");
318+
(vs0, "0"), (vs1, "1"), (vs2, "2"), (vs3, "3"), (vs4, "4"), (vs5, "5"), (vs6, "6"), (vs7, "7"),
319+
(vs8, "8"), (vs9, "9"), (vs10, "10"), (vs11, "11"), (vs12, "12"), (vs13, "13"), (vs14, "14"),
320+
(vs15, "15"), (vs16, "16"), (vs17, "17"), (vs18, "18"), (vs19, "19"), (vs20, "20"), (vs21, "21"),
321+
(vs22, "22"), (vs23, "23"), (vs24, "24"), (vs25, "25"), (vs26, "26"), (vs27, "27"), (vs28, "28"),
322+
(vs29, "29"), (vs30, "30"), (vs31, "31"), (vs32, "32"), (vs33, "33"), (vs34, "34"), (vs35, "35"),
323+
(vs36, "36"), (vs37, "37"), (vs38, "38"), (vs39, "39"), (vs40, "40"), (vs41, "41"), (vs42, "42"),
324+
(vs43, "43"), (vs44, "44"), (vs45, "45"), (vs46, "46"), (vs47, "47"), (vs48, "48"), (vs49, "49"),
325+
(vs50, "50"), (vs51, "51"), (vs52, "52"), (vs53, "53"), (vs54, "54"), (vs55, "55"), (vs56, "56"),
326+
(vs57, "57"), (vs58, "58"), (vs59, "59"), (vs60, "60"), (vs61, "61"), (vs62, "62"), (vs63, "63"),
248327
(cr, "cr");
249328
(cr0, "0"), (cr1, "1"), (cr2, "2"), (cr3, "3"), (cr4, "4"), (cr5, "5"), (cr6, "6"), (cr7, "7");
250329
(ctr, "ctr");
@@ -276,8 +355,74 @@ impl PowerPCInlineAsmReg {
276355
};
277356
}
278357
reg_conflicts! {
279-
cr : cr0 cr1 cr2 cr3 cr4 cr5 cr6 cr7;
358+
cr : cr0 cr1 cr2 cr3 cr4 cr5 cr6 cr7,
359+
// f0-f31 overlap half of each of vs0-vs32.
360+
vs0 : f0,
361+
vs1 : f1,
362+
vs2 : f2,
363+
vs3 : f3,
364+
vs4 : f4,
365+
vs5 : f5,
366+
vs6 : f6,
367+
vs7 : f7,
368+
vs8 : f8,
369+
vs9 : f9,
370+
vs10 : f10,
371+
vs11 : f11,
372+
vs12 : f12,
373+
vs13 : f13,
374+
vs14 : f14,
375+
vs15 : f15,
376+
vs16 : f16,
377+
vs17 : f17,
378+
vs18 : f18,
379+
vs19 : f19,
380+
vs20 : f20,
381+
vs21 : f21,
382+
vs22 : f22,
383+
vs23 : f23,
384+
vs24 : f24,
385+
vs25 : f25,
386+
vs26 : f26,
387+
vs27 : f27,
388+
vs28 : f28,
389+
vs29 : f29,
390+
vs30 : f30,
391+
vs31 : f31,
392+
// vs32-v63 are aliases of v0-v31
393+
vs32 : v0,
394+
vs33 : v1,
395+
vs34 : v2,
396+
vs35 : v3,
397+
vs36 : v4,
398+
vs37 : v5,
399+
vs38 : v6,
400+
vs39 : v7,
401+
vs40 : v8,
402+
vs41 : v9,
403+
vs42 : v10,
404+
vs43 : v11,
405+
vs44 : v12,
406+
vs45 : v13,
407+
vs46 : v14,
408+
vs47 : v15,
409+
vs48 : v16,
410+
vs49 : v17,
411+
vs50 : v18,
412+
vs51 : v19,
413+
vs52 : v20,
414+
vs53 : v21,
415+
vs54 : v22,
416+
vs55 : v23,
417+
vs56 : v24,
418+
vs57 : v25,
419+
vs58 : v26,
420+
vs59 : v27,
421+
vs60 : v28,
422+
vs61 : v29,
423+
vs62 : v30,
424+
vs63 : v31;
280425
}
281-
// f0-f31 (vsr0-vsr31) and v0-v31 (vsr32-vsr63) do not conflict.
426+
// For more detail, see ISA 3.1, Book I, Section 7.2.
282427
}
283428
}

src/doc/unstable-book/src/language-features/asm-experimental-arch.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ This feature tracks `asm!` and `global_asm!` support for the following architect
3535
| PowerPC | `reg_nonzero` | `r[3-12]`, `r[14-28]` | `b` |
3636
| PowerPC | `freg` | `f[0-31]` | `f` |
3737
| PowerPC | `vreg` | `v[0-31]` | `v` |
38+
| PowerPC | `vsreg | `vs[0-63]` | `vs` |
3839
| PowerPC | `cr` | `cr[0-7]`, `cr` | Only clobbers |
3940
| PowerPC | `ctr` | `ctr` | Only clobbers |
4041
| PowerPC | `lr` | `lr` | Only clobbers |
@@ -79,6 +80,7 @@ This feature tracks `asm!` and `global_asm!` support for the following architect
7980
| PowerPC | `freg` | None | `f32`, `f64` |
8081
| PowerPC | `vreg` | `altivec` | `i8x16`, `i16x8`, `i32x4`, `f32x4` |
8182
| PowerPC | `vreg` | `vsx` | `f32`, `f64`, `i64x2`, `f64x2` |
83+
| PowerPC | `vsreg` | `vsx` | The union of vsx and altivec vreg types |
8284
| PowerPC | `cr` | N/A | Only clobbers |
8385
| PowerPC | `ctr` | N/A | Only clobbers |
8486
| PowerPC | `lr` | N/A | Only clobbers |
@@ -185,6 +187,7 @@ This feature tracks `asm!` and `global_asm!` support for the following architect
185187
| PowerPC | `reg_nonzero` | None | `3` | None |
186188
| PowerPC | `freg` | None | `0` | None |
187189
| PowerPC | `vreg` | None | `0` | None |
190+
| PowerPC | `vsreg` | None | `0` | None |
188191
| SPARC | `reg` | None | `%o0` | None |
189192
| CSKY | `reg` | None | `r0` | None |
190193
| CSKY | `freg` | None | `f0` | None |

0 commit comments

Comments
 (0)