Skip to content

Commit 67eb05b

Browse files
[PowerPC] Add special handling for arguments that are smaller than pointer size. (#119003)
When arguments are passed in memory instead of registers we currently load the entire pointer size even though the argument may be smaller. For exmaple if the pointer size if i32 then we use a load word even if the argument is only an i8. This patch zeros / extends the bits that are not required to ensure that we are getting the correct value even if the load is larger.
1 parent bb1961e commit 67eb05b

File tree

3 files changed

+100
-59
lines changed

3 files changed

+100
-59
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7244,6 +7244,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
72447244
MVT LocVT = VA.getLocVT();
72457245
MVT ValVT = VA.getValVT();
72467246
ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7247+
7248+
EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7249+
bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
72477250
// For compatibility with the AIX XL compiler, the float args in the
72487251
// parameter save area are initialized even if the argument is available
72497252
// in register. The caller is required to initialize both the register
@@ -7291,7 +7294,24 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
72917294
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
72927295
SDValue ArgValue =
72937296
DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7294-
InVals.push_back(ArgValue);
7297+
7298+
// While the ABI specifies the argument type is (sign or zero) extended
7299+
// out to register width, not all code is compliant. We truncate and
7300+
// re-extend to be more forgiving of these callers when the argument type
7301+
// is smaller than register width.
7302+
if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7303+
ValVT.isInteger() &&
7304+
ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7305+
SDValue ArgValueTrunc = DAG.getNode(
7306+
ISD::TRUNCATE, dl, ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT,
7307+
ArgValue);
7308+
SDValue ArgValueExt =
7309+
ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7310+
: DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7311+
InVals.push_back(ArgValueExt);
7312+
} else {
7313+
InVals.push_back(ArgValue);
7314+
}
72957315
};
72967316

72977317
// Vector arguments to VaArg functions are passed both on the stack, and

llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,12 +1102,12 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
11021102
; 32BIT-NEXT: {{ $}}
11031103
; 32BIT-NEXT: renamable $r11 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
11041104
; 32BIT-NEXT: renamable $r12 = LWZ 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4)
1105-
; 32BIT-NEXT: renamable $r0 = LWZ 0, %fixed-stack.1 :: (load (s32) from %fixed-stack.1, align 8)
1105+
; 32BIT-NEXT: renamable $r0 = LBZ 3, %fixed-stack.1 :: (load (s8) from %fixed-stack.1 + 3, basealign 4)
11061106
; 32BIT-NEXT: renamable $r31 = LWZ 4, %fixed-stack.3 :: (load (s32) from %fixed-stack.3 + 4, basealign 16)
11071107
; 32BIT-NEXT: renamable $r30 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3, align 16)
11081108
; 32BIT-NEXT: renamable $r29 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5, align 8)
1109-
; 32BIT-NEXT: renamable $r28 = LWZ 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6)
1110-
; 32BIT-NEXT: renamable $r27 = LWZ 0, %fixed-stack.7 :: (load (s32) from %fixed-stack.7, align 16)
1109+
; 32BIT-NEXT: renamable $r28 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
1110+
; 32BIT-NEXT: renamable $r27 = LHA 2, %fixed-stack.7 :: (load (s16) from %fixed-stack.7 + 2, basealign 4)
11111111
; 32BIT-NEXT: renamable $r26 = LWZ 4, %fixed-stack.9 :: (load (s32) from %fixed-stack.9 + 4, basealign 8)
11121112
; 32BIT-NEXT: renamable $r25 = LWZ 0, %fixed-stack.9 :: (load (s32) from %fixed-stack.9, align 8)
11131113
; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4
@@ -1143,13 +1143,13 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
11431143
; 64BIT: bb.0.entry:
11441144
; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
11451145
; 64BIT-NEXT: {{ $}}
1146-
; 64BIT-NEXT: renamable $r11 = LWZ 0, %fixed-stack.1, implicit-def $x11 :: (load (s32) from %fixed-stack.1)
1146+
; 64BIT-NEXT: renamable $r11 = LBZ 3, %fixed-stack.1, implicit-def $x11 :: (load (s8) from %fixed-stack.1 + 3, basealign 4)
11471147
; 64BIT-NEXT: renamable $x12 = LWZ8 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4)
1148-
; 64BIT-NEXT: renamable $x0 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
1149-
; 64BIT-NEXT: renamable $x2 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2)
1150-
; 64BIT-NEXT: renamable $x31 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
1151-
; 64BIT-NEXT: renamable $r30 = LWZ 0, %fixed-stack.5, implicit-def $x30 :: (load (s32) from %fixed-stack.5)
1152-
; 64BIT-NEXT: renamable $x29 = LWA 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6)
1148+
; 64BIT-NEXT: renamable $r0 = LBZ 3, %fixed-stack.5, implicit-def $x0 :: (load (s8) from %fixed-stack.5 + 3, basealign 4)
1149+
; 64BIT-NEXT: renamable $x2 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
1150+
; 64BIT-NEXT: renamable $x31 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2)
1151+
; 64BIT-NEXT: renamable $x30 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
1152+
; 64BIT-NEXT: renamable $x29 = LHA8 2, %fixed-stack.6
11531153
; 64BIT-NEXT: renamable $x28 = LD 0, %fixed-stack.7 :: (load (s64) from %fixed-stack.7, align 16)
11541154
; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3
11551155
; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5
@@ -1161,12 +1161,12 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
11611161
; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r3
11621162
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x28
11631163
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x29
1164-
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x30
1164+
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x0
11651165
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x12
1166+
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x30
11661167
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x31
1167-
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x2
11681168
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x11
1169-
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x0
1169+
; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x2
11701170
; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
11711171
entry:
11721172
%add = add nsw i32 %i1, %i2
@@ -1611,8 +1611,8 @@ define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroex
16111611
; 32BIT-NEXT: liveins: $f1, $f2, $f3, $f4
16121612
; 32BIT-NEXT: {{ $}}
16131613
; 32BIT-NEXT: renamable $r3 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
1614-
; 32BIT-NEXT: renamable $r4 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5)
1615-
; 32BIT-NEXT: renamable $r5 = LWZ 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6, align 8)
1614+
; 32BIT-NEXT: renamable $r4 = LHA 2, %fixed-stack.5 :: (load (s16) from %fixed-stack.5 + 2, basealign 4)
1615+
; 32BIT-NEXT: renamable $r5 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
16161616
; 32BIT-NEXT: renamable $r6 = LWZ 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 8)
16171617
; 32BIT-NEXT: renamable $r7 = LIS 17200
16181618
; 32BIT-NEXT: STW killed renamable $r7, 0, %stack.1 :: (store (s32) into %stack.1, align 8)

llvm/test/CodeGen/PowerPC/aix-cc-abi.ll

Lines changed: 65 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,78 +1181,95 @@ entry:
11811181

11821182
declare void @test_stackarg_float3(i32, i32, i32, i32, i32, i32, i32, ...)
11831183

1184-
define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i64 %ll9, i16 signext %s10, i8 zeroext %c11, i32 %ui12, i32 %si13, i64 %ll14, i8 zeroext %uc15, i32 %i16) {
1184+
define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i64 %ll9, i16 signext %s10, i8 zeroext %c11, i32 %ui12, i32 %si13, i64 %ll14, i8 zeroext %uc15, i32 %i16, i8 signext %si8, i1 zeroext %zi1) {
11851185
; ASM32PWR4-LABEL: test_ints_stack:
11861186
; ASM32PWR4: # %bb.0: # %entry
11871187
; ASM32PWR4-NEXT: add 3, 3, 4
1188-
; ASM32PWR4-NEXT: lwz 11, 92(1)
1188+
; ASM32PWR4-NEXT: stw 31, -4(1) # 4-byte Folded Spill
11891189
; ASM32PWR4-NEXT: add 3, 3, 5
11901190
; ASM32PWR4-NEXT: add 3, 3, 6
11911191
; ASM32PWR4-NEXT: add 3, 3, 7
1192-
; ASM32PWR4-NEXT: lwz 12, 76(1)
1192+
; ASM32PWR4-NEXT: lbz 12, 99(1)
11931193
; ASM32PWR4-NEXT: add 3, 3, 8
11941194
; ASM32PWR4-NEXT: add 3, 3, 9
1195-
; ASM32PWR4-NEXT: lwz 6, 60(1)
1195+
; ASM32PWR4-NEXT: lwz 0, 92(1)
11961196
; ASM32PWR4-NEXT: add 3, 3, 10
1197-
; ASM32PWR4-NEXT: srawi 5, 11, 31
1197+
; ASM32PWR4-NEXT: extsb 4, 12
11981198
; ASM32PWR4-NEXT: srawi 8, 3, 31
1199-
; ASM32PWR4-NEXT: lwz 4, 64(1)
1199+
; ASM32PWR4-NEXT: lwz 31, 76(1)
1200+
; ASM32PWR4-NEXT: srawi 12, 0, 31
1201+
; ASM32PWR4-NEXT: lwz 6, 60(1)
1202+
; ASM32PWR4-NEXT: lha 11, 66(1)
12001203
; ASM32PWR4-NEXT: lwz 7, 56(1)
1201-
; ASM32PWR4-NEXT: stw 31, -4(1) # 4-byte Folded Spill
1202-
; ASM32PWR4-NEXT: srawi 31, 12, 31
1204+
; ASM32PWR4-NEXT: stw 30, -8(1) # 4-byte Folded Spill
1205+
; ASM32PWR4-NEXT: srawi 30, 31, 31
12031206
; ASM32PWR4-NEXT: addc 3, 3, 6
12041207
; ASM32PWR4-NEXT: adde 7, 8, 7
1205-
; ASM32PWR4-NEXT: lwz 6, 68(1)
1206-
; ASM32PWR4-NEXT: srawi 8, 4, 31
1207-
; ASM32PWR4-NEXT: addc 3, 3, 4
1208+
; ASM32PWR4-NEXT: lbz 6, 71(1)
1209+
; ASM32PWR4-NEXT: srawi 8, 11, 31
1210+
; ASM32PWR4-NEXT: addc 3, 3, 11
12081211
; ASM32PWR4-NEXT: adde 7, 7, 8
1209-
; ASM32PWR4-NEXT: lwz 4, 72(1)
1212+
; ASM32PWR4-NEXT: lwz 9, 72(1)
12101213
; ASM32PWR4-NEXT: addc 3, 3, 6
12111214
; ASM32PWR4-NEXT: addze 6, 7
1212-
; ASM32PWR4-NEXT: addc 3, 3, 4
1213-
; ASM32PWR4-NEXT: lwz 0, 84(1)
1214-
; ASM32PWR4-NEXT: addze 4, 6
1215-
; ASM32PWR4-NEXT: addc 3, 3, 12
1215+
; ASM32PWR4-NEXT: addc 3, 3, 9
1216+
; ASM32PWR4-NEXT: lwz 5, 84(1)
1217+
; ASM32PWR4-NEXT: addze 6, 6
1218+
; ASM32PWR4-NEXT: addc 3, 3, 31
12161219
; ASM32PWR4-NEXT: lwz 7, 80(1)
1217-
; ASM32PWR4-NEXT: adde 4, 4, 31
1220+
; ASM32PWR4-NEXT: adde 6, 6, 30
1221+
; ASM32PWR4-NEXT: addc 3, 3, 5
1222+
; ASM32PWR4-NEXT: lbz 8, 91(1)
1223+
; ASM32PWR4-NEXT: adde 5, 6, 7
1224+
; ASM32PWR4-NEXT: addc 3, 3, 8
1225+
; ASM32PWR4-NEXT: lbz 6, 103(1)
1226+
; ASM32PWR4-NEXT: addze 5, 5
12181227
; ASM32PWR4-NEXT: addc 3, 3, 0
1219-
; ASM32PWR4-NEXT: lwz 6, 88(1)
1220-
; ASM32PWR4-NEXT: adde 4, 4, 7
1221-
; ASM32PWR4-NEXT: addc 3, 3, 6
1228+
; ASM32PWR4-NEXT: adde 5, 5, 12
12221229
; ASM32PWR4-NEXT: lwz 31, -4(1) # 4-byte Folded Reload
1223-
; ASM32PWR4-NEXT: addze 6, 4
1224-
; ASM32PWR4-NEXT: addc 4, 3, 11
1225-
; ASM32PWR4-NEXT: adde 3, 6, 5
1230+
; ASM32PWR4-NEXT: srawi 7, 4, 31
1231+
; ASM32PWR4-NEXT: addc 3, 3, 4
1232+
; ASM32PWR4-NEXT: adde 5, 5, 7
1233+
; ASM32PWR4-NEXT: lwz 30, -8(1) # 4-byte Folded Reload
1234+
; ASM32PWR4-NEXT: addc 4, 3, 6
1235+
; ASM32PWR4-NEXT: addze 3, 5
12261236
; ASM32PWR4-NEXT: blr
12271237
;
12281238
; ASM64PWR4-LABEL: test_ints_stack:
12291239
; ASM64PWR4: # %bb.0: # %entry
12301240
; ASM64PWR4-NEXT: add 3, 3, 4
1231-
; ASM64PWR4-NEXT: ld 4, 112(1)
1241+
; ASM64PWR4-NEXT: std 31, -8(1) # 8-byte Folded Spill
12321242
; ASM64PWR4-NEXT: add 3, 3, 5
12331243
; ASM64PWR4-NEXT: add 3, 3, 6
12341244
; ASM64PWR4-NEXT: add 3, 3, 7
1235-
; ASM64PWR4-NEXT: lwa 12, 124(1)
1245+
; ASM64PWR4-NEXT: std 2, -16(1) # 8-byte Folded Spill
12361246
; ASM64PWR4-NEXT: add 3, 3, 8
12371247
; ASM64PWR4-NEXT: add 3, 3, 9
1248+
; ASM64PWR4-NEXT: ld 6, 112(1)
12381249
; ASM64PWR4-NEXT: add 3, 3, 10
12391250
; ASM64PWR4-NEXT: extsw 3, 3
1240-
; ASM64PWR4-NEXT: lwz 5, 132(1)
1241-
; ASM64PWR4-NEXT: add 3, 3, 4
1251+
; ASM64PWR4-NEXT: lha 0, 126(1)
1252+
; ASM64PWR4-NEXT: add 3, 3, 6
1253+
; ASM64PWR4-NEXT: add 3, 3, 0
1254+
; ASM64PWR4-NEXT: lbz 5, 135(1)
1255+
; ASM64PWR4-NEXT: lwz 7, 140(1)
1256+
; ASM64PWR4-NEXT: add 3, 3, 5
1257+
; ASM64PWR4-NEXT: lwa 12, 148(1)
1258+
; ASM64PWR4-NEXT: add 3, 3, 7
12421259
; ASM64PWR4-NEXT: add 3, 3, 12
1243-
; ASM64PWR4-NEXT: std 2, -8(1) # 8-byte Folded Spill
1260+
; ASM64PWR4-NEXT: ld 31, 152(1)
1261+
; ASM64PWR4-NEXT: lbz 5, 167(1)
1262+
; ASM64PWR4-NEXT: add 3, 3, 31
1263+
; ASM64PWR4-NEXT: lwa 11, 172(1)
12441264
; ASM64PWR4-NEXT: add 3, 3, 5
1245-
; ASM64PWR4-NEXT: lwz 2, 140(1)
1246-
; ASM64PWR4-NEXT: lwa 11, 148(1)
1247-
; ASM64PWR4-NEXT: add 3, 3, 2
12481265
; ASM64PWR4-NEXT: add 3, 3, 11
1249-
; ASM64PWR4-NEXT: ld 4, 152(1)
1250-
; ASM64PWR4-NEXT: lwz 0, 164(1)
1266+
; ASM64PWR4-NEXT: lbz 2, 183(1)
1267+
; ASM64PWR4-NEXT: lbz 6, 191(1)
1268+
; ASM64PWR4-NEXT: extsb 4, 2
12511269
; ASM64PWR4-NEXT: add 3, 3, 4
1252-
; ASM64PWR4-NEXT: lwa 5, 172(1)
1253-
; ASM64PWR4-NEXT: add 3, 3, 0
1254-
; ASM64PWR4-NEXT: add 3, 3, 5
1255-
; ASM64PWR4-NEXT: ld 2, -8(1) # 8-byte Folded Reload
1270+
; ASM64PWR4-NEXT: add 3, 3, 6
1271+
; ASM64PWR4-NEXT: ld 2, -16(1) # 8-byte Folded Reload
1272+
; ASM64PWR4-NEXT: ld 31, -8(1) # 8-byte Folded Reload
12561273
; ASM64PWR4-NEXT: blr
12571274
entry:
12581275
%add = add nsw i32 %i1, %i2
@@ -1277,7 +1294,11 @@ entry:
12771294
%add18 = add nsw i64 %add16, %conv17
12781295
%conv19 = sext i32 %i16 to i64
12791296
%add20 = add nsw i64 %add18, %conv19
1280-
ret i64 %add20
1297+
%conv21 = sext i8 %si8 to i64
1298+
%add22 = add nsw i64 %add20, %conv21
1299+
%conv23 = zext i1 %zi1 to i64
1300+
%add24 = add nsw i64 %add22, %conv23
1301+
ret i64 %add24
12811302
}
12821303

12831304
@ll1 = common global i64 0, align 8
@@ -1720,17 +1741,17 @@ entry:
17201741
define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroext %c1, i16 signext %s1, i64 %ll1, i32 %i1, i32 %i2, i32 %i3) {
17211742
; ASM32PWR4-LABEL: mix_callee:
17221743
; ASM32PWR4: # %bb.0: # %entry
1723-
; ASM32PWR4-NEXT: lwz 4, 60(1)
1744+
; ASM32PWR4-NEXT: lha 3, 62(1)
17241745
; ASM32PWR4-NEXT: lis 8, 17200
17251746
; ASM32PWR4-NEXT: fadd 1, 1, 2
17261747
; ASM32PWR4-NEXT: fadd 1, 1, 3
1727-
; ASM32PWR4-NEXT: lwz 5, 56(1)
1728-
; ASM32PWR4-NEXT: lwz 3, 68(1)
1729-
; ASM32PWR4-NEXT: add 4, 5, 4
1730-
; ASM32PWR4-NEXT: lwz 5, L..C34(2) # %const.0
1748+
; ASM32PWR4-NEXT: lbz 5, 59(1)
17311749
; ASM32PWR4-NEXT: fadd 1, 1, 4
1750+
; ASM32PWR4-NEXT: lwz 4, 68(1)
1751+
; ASM32PWR4-NEXT: add 3, 5, 3
1752+
; ASM32PWR4-NEXT: lwz 5, L..C34(2) # %const.0
17321753
; ASM32PWR4-NEXT: lwz 6, 72(1)
1733-
; ASM32PWR4-NEXT: add 3, 4, 3
1754+
; ASM32PWR4-NEXT: add 3, 3, 4
17341755
; ASM32PWR4-NEXT: lwz 7, 76(1)
17351756
; ASM32PWR4-NEXT: add 3, 3, 6
17361757
; ASM32PWR4-NEXT: stw 8, -16(1)

0 commit comments

Comments
 (0)