Skip to content

Commit 416faa8

Browse files
Merge branch 'main' into fp16-fptrunc-fpext-lowering
2 parents c172895 + 728e925 commit 416faa8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+665
-567
lines changed

clang/lib/Sema/SemaExpr.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15944,6 +15944,20 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
1594415944
return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
1594515945
<< resultType << Input.get()->getSourceRange());
1594615946
}
15947+
} else if (Context.getLangOpts().HLSL && resultType->isVectorType() &&
15948+
!resultType->hasBooleanRepresentation()) {
15949+
// HLSL unary logical 'not' behaves like C++, which states that the
15950+
// operand is converted to bool and the result is bool, however HLSL
15951+
// extends this property to vectors.
15952+
const VectorType *VTy = resultType->castAs<VectorType>();
15953+
resultType =
15954+
Context.getExtVectorType(Context.BoolTy, VTy->getNumElements());
15955+
15956+
Input = ImpCastExprToType(
15957+
Input.get(), resultType,
15958+
ScalarTypeToBooleanCastKind(VTy->getElementType()))
15959+
.get();
15960+
break;
1594715961
} else if (resultType->isExtVectorType()) {
1594815962
if (Context.getLangOpts().OpenCL &&
1594915963
Context.getLangOpts().getOpenCLCompatibleVersion() < 120) {
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -disable-llvm-passes -emit-llvm -finclude-default-header -fnative-half-type -o - %s | FileCheck %s
2+
3+
// CHECK-LABEL: case1
4+
// CHECK: [[ToBool:%.*]] = icmp ne <2 x i32> {{.*}}, zeroinitializer
5+
// CHECK-NEXT: [[BoolCmp:%.*]] = icmp eq <2 x i1> [[ToBool]], zeroinitializer
6+
// CHECK-NEXT: {{.*}} = zext <2 x i1> [[BoolCmp]] to <2 x i32>
7+
export uint32_t2 case1(uint32_t2 b) {
8+
return !b;
9+
}
10+
11+
// CHECK-LABEL: case2
12+
// CHECK: [[ToBool:%.*]] = icmp ne <3 x i32> {{.*}}, zeroinitializer
13+
// CHECK-NEXT: [[BoolCmp:%.*]] = icmp eq <3 x i1> [[ToBool]], zeroinitializer
14+
// CHECK-NEXT: {{.*}} = zext <3 x i1> [[BoolCmp]] to <3 x i32>
15+
export int32_t3 case2(int32_t3 b) {
16+
return !b;
17+
}
18+
19+
// CHECK-LABEL: case3
20+
// CHECK: [[ToBool:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une half {{.*}}, 0xH0000
21+
// CHECK-NEXT: [[BoolCmp:%.*]] = xor i1 [[ToBool]], true
22+
// CHECK-NEXT: {{.*}} = uitofp i1 [[BoolCmp]] to half
23+
export float16_t case3(float16_t b) {
24+
return !b;
25+
}
26+
27+
// CHECK-LABEL: case4
28+
// CHECK: [[ToBool:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <4 x float> {{.*}}, zeroinitializer
29+
// CHECK-NEXT: [[BoolCmp:%.*]] = icmp eq <4 x i1> [[ToBool]], zeroinitializer
30+
// CHECK-NEXT: {{.*}} = uitofp <4 x i1> [[BoolCmp]] to <4 x float>
31+
export float4 case4(float4 b) {
32+
return !b;
33+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -ast-dump -ast-dump-filter=case | FileCheck %s
2+
3+
// CHECK-LABEL: FunctionDecl {{.*}} used case1 'uint32_t2 (uint32_t2)'
4+
// CHECK-NEXT: ParmVarDecl {{.*}} used b 'uint32_t2':'vector<uint32_t, 2>'
5+
// CHECK-NEXT: CompoundStmt
6+
// CHECK-NEXT: ReturnStmt
7+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<uint32_t, 2>' <IntegralCast>
8+
// CHECK-NEXT: UnaryOperator {{.*}} 'vector<bool, 2>' prefix '!' cannot overflow
9+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<bool, 2>' <IntegralToBoolean>
10+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'uint32_t2':'vector<uint32_t, 2>' <LValueToRValue>
11+
// CHECK-NEXT: DeclRefExpr {{.*}} 'uint32_t2':'vector<uint32_t, 2>' lvalue ParmVar {{.*}} 'b' 'uint32_t2':'vector<uint32_t, 2>'
12+
export uint32_t2 case1(uint32_t2 b) {
13+
return !b;
14+
}
15+
16+
// CHECK-LABEL: FunctionDecl {{.*}} used case2 'int32_t3 (int32_t3)'
17+
// CHECK-NEXT: ParmVarDecl {{.*}} used b 'int32_t3':'vector<int32_t, 3>'
18+
// CHECK-NEXT: CompoundStmt
19+
// CHECK-NEXT: ReturnStmt
20+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<int32_t, 3>' <IntegralCast>
21+
// CHECK-NEXT: UnaryOperator {{.*}} 'vector<bool, 3>' prefix '!' cannot overflow
22+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<bool, 3>' <IntegralToBoolean>
23+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int32_t3':'vector<int32_t, 3>' <LValueToRValue>
24+
// CHECK-NEXT: DeclRefExpr {{.*}} 'int32_t3':'vector<int32_t, 3>' lvalue ParmVar {{.*}} 'b' 'int32_t3':'vector<int32_t, 3>'
25+
export int32_t3 case2(int32_t3 b) {
26+
return !b;
27+
}
28+
29+
// CHECK-LABEL: FunctionDecl {{.*}} used case3 'float16_t (float16_t)'
30+
// CHECK-NEXT: ParmVarDecl {{.*}} used b 'float16_t':'half'
31+
// CHECK-NEXT: CompoundStmt
32+
// CHECK-NEXT: ReturnStmt
33+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float16_t':'half' <IntegralToFloating>
34+
// CHECK-NEXT: UnaryOperator {{.*}} 'bool' prefix '!' cannot overflow
35+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'bool' <FloatingToBoolean>
36+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float16_t':'half' <LValueToRValue>
37+
// CHECK-NEXT: DeclRefExpr {{.*}} 'float16_t':'half' lvalue ParmVar {{.*}} 'b' 'float16_t':'half'
38+
export float16_t case3(float16_t b) {
39+
return !b;
40+
}
41+
42+
// CHECK-LABEL: FunctionDecl {{.*}} used case4 'float4 (float4)'
43+
// CHECK-NEXT: ParmVarDecl {{.*}} used b 'float4':'vector<float, 4>'
44+
// CHECK-NEXT: CompoundStmt
45+
// CHECK-NEXT: ReturnStmt
46+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 4>' <IntegralToFloating>
47+
// CHECK-NEXT: UnaryOperator {{.*}} 'vector<bool, 4>' prefix '!' cannot overflow
48+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<bool, 4>' <FloatingToBoolean>
49+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector<float, 4>' <LValueToRValue>
50+
// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector<float, 4>' lvalue ParmVar {{.*}} 'b' 'float4':'vector<float, 4>'
51+
export float4 case4(float4 b) {
52+
return !b;
53+
}

llvm/docs/CodeOfConduct.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ The current committee members are:
171171
Transparency Reports
172172
====================
173173

174+
* `July 15, 2025 <https://discourse.llvm.org/t/llvm-code-of-conduct-transparency-report-july-15-2024-july-15-2025/88622>`_
174175
* `July 15, 2024 <https://discourse.llvm.org/t/llvm-code-of-conduct-transparency-report-july-15-2023-july-15-2024/82687>`_
175176
* `July 15, 2023 <https://llvm.org/coc-reports/2023-07-15-report.html>`_
176177
* `July 15, 2022 <https://llvm.org/coc-reports/2022-07-15-report.html>`_

llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,18 @@ m_scev_UDiv(const Op0_t &Op0, const Op1_t &Op1) {
256256
return m_scev_Binary<SCEVUDivExpr>(Op0, Op1);
257257
}
258258

259+
template <typename Op0_t, typename Op1_t>
260+
inline SCEVBinaryExpr_match<SCEVSMaxExpr, Op0_t, Op1_t>
261+
m_scev_SMax(const Op0_t &Op0, const Op1_t &Op1) {
262+
return m_scev_Binary<SCEVSMaxExpr>(Op0, Op1);
263+
}
264+
265+
template <typename Op0_t, typename Op1_t>
266+
inline SCEVBinaryExpr_match<SCEVMinMaxExpr, Op0_t, Op1_t>
267+
m_scev_MinMax(const Op0_t &Op0, const Op1_t &Op1) {
268+
return m_scev_Binary<SCEVMinMaxExpr>(Op0, Op1);
269+
}
270+
259271
/// Match unsigned remainder pattern.
260272
/// Matches patterns generated by getURemExpr.
261273
template <typename Op0_t, typename Op1_t> struct SCEVURem_match {

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 44 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,19 +1840,19 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
18401840
// = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM
18411841
// = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>.
18421842
//
1843-
if (SM->getNumOperands() == 2)
1844-
if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0)))
1845-
if (MulLHS->getAPInt().isPowerOf2())
1846-
if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) {
1847-
int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) -
1848-
MulLHS->getAPInt().logBase2();
1849-
Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
1850-
return getMulExpr(
1851-
getZeroExtendExpr(MulLHS, Ty),
1852-
getZeroExtendExpr(
1853-
getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty),
1854-
SCEV::FlagNUW, Depth + 1);
1855-
}
1843+
const APInt *C;
1844+
const SCEV *TruncRHS;
1845+
if (match(SM,
1846+
m_scev_Mul(m_scev_APInt(C), m_scev_Trunc(m_SCEV(TruncRHS)))) &&
1847+
C->isPowerOf2()) {
1848+
int NewTruncBits =
1849+
getTypeSizeInBits(SM->getOperand(1)->getType()) - C->logBase2();
1850+
Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
1851+
return getMulExpr(
1852+
getZeroExtendExpr(SM->getOperand(0), Ty),
1853+
getZeroExtendExpr(getTruncateExpr(TruncRHS, NewTruncTy), Ty),
1854+
SCEV::FlagNUW, Depth + 1);
1855+
}
18561856
}
18571857

18581858
// zext(umin(x, y)) -> umin(zext(x), zext(y))
@@ -3144,20 +3144,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
31443144
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
31453145
if (Ops.size() == 2) {
31463146
// C1*(C2+V) -> C1*C2 + C1*V
3147-
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
3148-
// If any of Add's ops are Adds or Muls with a constant, apply this
3149-
// transformation as well.
3150-
//
3151-
// TODO: There are some cases where this transformation is not
3152-
// profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
3153-
// this transformation should be narrowed down.
3154-
if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) {
3155-
const SCEV *LHS = getMulExpr(LHSC, Add->getOperand(0),
3156-
SCEV::FlagAnyWrap, Depth + 1);
3157-
const SCEV *RHS = getMulExpr(LHSC, Add->getOperand(1),
3158-
SCEV::FlagAnyWrap, Depth + 1);
3159-
return getAddExpr(LHS, RHS, SCEV::FlagAnyWrap, Depth + 1);
3160-
}
3147+
// If any of Add's ops are Adds or Muls with a constant, apply this
3148+
// transformation as well.
3149+
//
3150+
// TODO: There are some cases where this transformation is not
3151+
// profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
3152+
// this transformation should be narrowed down.
3153+
const SCEV *Op0, *Op1;
3154+
if (match(Ops[1], m_scev_Add(m_SCEV(Op0), m_SCEV(Op1))) &&
3155+
containsConstantInAddMulChain(Ops[1])) {
3156+
const SCEV *LHS = getMulExpr(LHSC, Op0, SCEV::FlagAnyWrap, Depth + 1);
3157+
const SCEV *RHS = getMulExpr(LHSC, Op1, SCEV::FlagAnyWrap, Depth + 1);
3158+
return getAddExpr(LHS, RHS, SCEV::FlagAnyWrap, Depth + 1);
3159+
}
31613160

31623161
if (Ops[0]->isAllOnesValue()) {
31633162
// If we have a mul by -1 of an add, try distributing the -1 among the
@@ -3578,20 +3577,12 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
35783577
}
35793578

35803579
// ((-C + (C smax %x)) /u %x) evaluates to zero, for any positive constant C.
3581-
if (const auto *AE = dyn_cast<SCEVAddExpr>(LHS);
3582-
AE && AE->getNumOperands() == 2) {
3583-
if (const auto *VC = dyn_cast<SCEVConstant>(AE->getOperand(0))) {
3584-
const APInt &NegC = VC->getAPInt();
3585-
if (NegC.isNegative() && !NegC.isMinSignedValue()) {
3586-
const auto *MME = dyn_cast<SCEVSMaxExpr>(AE->getOperand(1));
3587-
if (MME && MME->getNumOperands() == 2 &&
3588-
isa<SCEVConstant>(MME->getOperand(0)) &&
3589-
cast<SCEVConstant>(MME->getOperand(0))->getAPInt() == -NegC &&
3590-
MME->getOperand(1) == RHS)
3591-
return getZero(LHS->getType());
3592-
}
3593-
}
3594-
}
3580+
const APInt *NegC, *C;
3581+
if (match(LHS,
3582+
m_scev_Add(m_scev_APInt(NegC),
3583+
m_scev_SMax(m_scev_APInt(C), m_scev_Specific(RHS)))) &&
3584+
NegC->isNegative() && !NegC->isMinSignedValue() && *C == -*NegC)
3585+
return getZero(LHS->getType());
35953586

35963587
// TODO: Generalize to handle any common factors.
35973588
// udiv (mul nuw a, vscale), (mul nuw b, vscale) --> udiv a, b
@@ -10791,19 +10782,15 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
1079110782
}
1079210783

1079310784
static bool MatchBinarySub(const SCEV *S, const SCEV *&LHS, const SCEV *&RHS) {
10794-
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S);
10795-
if (!Add || Add->getNumOperands() != 2)
10785+
const SCEV *Op0, *Op1;
10786+
if (!match(S, m_scev_Add(m_SCEV(Op0), m_SCEV(Op1))))
1079610787
return false;
10797-
if (auto *ME = dyn_cast<SCEVMulExpr>(Add->getOperand(0));
10798-
ME && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) {
10799-
LHS = Add->getOperand(1);
10800-
RHS = ME->getOperand(1);
10788+
if (match(Op0, m_scev_Mul(m_scev_AllOnes(), m_SCEV(RHS)))) {
10789+
LHS = Op1;
1080110790
return true;
1080210791
}
10803-
if (auto *ME = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
10804-
ME && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) {
10805-
LHS = Add->getOperand(0);
10806-
RHS = ME->getOperand(1);
10792+
if (match(Op1, m_scev_Mul(m_scev_AllOnes(), m_SCEV(RHS)))) {
10793+
LHS = Op0;
1080710794
return true;
1080810795
}
1080910796
return false;
@@ -12166,13 +12153,10 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
1216612153
bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
1216712154
const SCEV *&L, const SCEV *&R,
1216812155
SCEV::NoWrapFlags &Flags) {
12169-
const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
12170-
if (!AE || AE->getNumOperands() != 2)
12156+
if (!match(Expr, m_scev_Add(m_SCEV(L), m_SCEV(R))))
1217112157
return false;
1217212158

12173-
L = AE->getOperand(0);
12174-
R = AE->getOperand(1);
12175-
Flags = AE->getNoWrapFlags();
12159+
Flags = cast<SCEVAddExpr>(Expr)->getNoWrapFlags();
1217612160
return true;
1217712161
}
1217812162

@@ -15550,19 +15534,10 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
1555015534
auto IsMinMaxSCEVWithNonNegativeConstant =
1555115535
[&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS,
1555215536
const SCEV *&RHS) {
15553-
if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) {
15554-
if (MinMax->getNumOperands() != 2)
15555-
return false;
15556-
if (auto *C = dyn_cast<SCEVConstant>(MinMax->getOperand(0))) {
15557-
if (C->getAPInt().isNegative())
15558-
return false;
15559-
SCTy = MinMax->getSCEVType();
15560-
LHS = MinMax->getOperand(0);
15561-
RHS = MinMax->getOperand(1);
15562-
return true;
15563-
}
15564-
}
15565-
return false;
15537+
const APInt *C;
15538+
SCTy = Expr->getSCEVType();
15539+
return match(Expr, m_scev_MinMax(m_SCEV(LHS), m_SCEV(RHS))) &&
15540+
match(LHS, m_scev_APInt(C)) && C->isNonNegative();
1556615541
};
1556715542

1556815543
// Return a new SCEV that modifies \p Expr to the closest number divides by

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6041,8 +6041,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
60416041
Triple T(TT);
60426042
// The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
60436043
// the address space of globals to 1. This does not apply to SPIRV Logical.
6044-
if (((T.isAMDGPU() && !T.isAMDGCN()) ||
6045-
(T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
6044+
if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
60466045
!DL.contains("-G") && !DL.starts_with("G")) {
60476046
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
60486047
}
@@ -6055,35 +6054,43 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
60556054
return DL.str();
60566055
}
60576056

6057+
// AMDGPU data layout upgrades.
60586058
std::string Res = DL.str();
6059-
// AMDGCN data layout upgrades.
6060-
if (T.isAMDGCN()) {
6059+
if (T.isAMDGPU()) {
60616060
// Define address spaces for constants.
60626061
if (!DL.contains("-G") && !DL.starts_with("G"))
60636062
Res.append(Res.empty() ? "G1" : "-G1");
60646063

6065-
// Add missing non-integral declarations.
6066-
// This goes before adding new address spaces to prevent incoherent string
6067-
// values.
6068-
if (!DL.contains("-ni") && !DL.starts_with("ni"))
6069-
Res.append("-ni:7:8:9");
6070-
// Update ni:7 to ni:7:8:9.
6071-
if (DL.ends_with("ni:7"))
6072-
Res.append(":8:9");
6073-
if (DL.ends_with("ni:7:8"))
6074-
Res.append(":9");
6075-
6076-
// Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6077-
// resources) An empty data layout has already been upgraded to G1 by now.
6078-
if (!DL.contains("-p7") && !DL.starts_with("p7"))
6079-
Res.append("-p7:160:256:256:32");
6080-
if (!DL.contains("-p8") && !DL.starts_with("p8"))
6081-
Res.append("-p8:128:128:128:48");
6082-
constexpr StringRef OldP8("-p8:128:128-");
6083-
if (DL.contains(OldP8))
6084-
Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6085-
if (!DL.contains("-p9") && !DL.starts_with("p9"))
6086-
Res.append("-p9:192:256:256:32");
6064+
// AMDGCN data layout upgrades.
6065+
if (T.isAMDGCN()) {
6066+
6067+
// Add missing non-integral declarations.
6068+
// This goes before adding new address spaces to prevent incoherent string
6069+
// values.
6070+
if (!DL.contains("-ni") && !DL.starts_with("ni"))
6071+
Res.append("-ni:7:8:9");
6072+
// Update ni:7 to ni:7:8:9.
6073+
if (DL.ends_with("ni:7"))
6074+
Res.append(":8:9");
6075+
if (DL.ends_with("ni:7:8"))
6076+
Res.append(":9");
6077+
6078+
// Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6079+
// resources) An empty data layout has already been upgraded to G1 by now.
6080+
if (!DL.contains("-p7") && !DL.starts_with("p7"))
6081+
Res.append("-p7:160:256:256:32");
6082+
if (!DL.contains("-p8") && !DL.starts_with("p8"))
6083+
Res.append("-p8:128:128:128:48");
6084+
constexpr StringRef OldP8("-p8:128:128-");
6085+
if (DL.contains(OldP8))
6086+
Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6087+
if (!DL.contains("-p9") && !DL.starts_with("p9"))
6088+
Res.append("-p9:192:256:256:32");
6089+
}
6090+
6091+
// Upgrade the ELF mangling mode.
6092+
if (!DL.contains("m:e"))
6093+
Res = Res.empty() ? "m:e" : "m:e-" + Res;
60876094

60886095
return Res;
60896096
}

llvm/lib/Target/X86/X86MCInstLower.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,6 +1970,8 @@ static void addConstantComments(const MachineInstr *MI,
19701970
}
19711971

19721972
CASE_ARITH_RM(PMADDWD)
1973+
CASE_ARITH_RM(PMULDQ)
1974+
CASE_ARITH_RM(PMULUDQ)
19731975
CASE_ARITH_RM(PMULLD)
19741976
CASE_AVX512_ARITH_RM(PMULLQ)
19751977
CASE_ARITH_RM(PMULLW)

llvm/test/CodeGen/X86/combine-multiplies.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@ define void @testCombineMultiplies_non_splat(<4 x i32> %v1) nounwind {
142142
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [11,22,33,44]
143143
; CHECK-NEXT: paddd %xmm0, %xmm1
144144
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
145-
; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
145+
; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [22,33,44,55]
146146
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
147-
; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
147+
; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [33,u,55,u]
148148
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
149149
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
150150
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [242,726,1452,2420]

0 commit comments

Comments
 (0)