Skip to content

Commit 75c7d99

Browse files
committed
Merge branch 'upstream' into x86-concat-vpermv3
2 parents b5e3a69 + 6c5941b commit 75c7d99

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+5564
-61
lines changed

clang/docs/Modules.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ first include path that would refer to the current file. ``#include_next`` is
152152
interpreted as if the current file had been found in that path.
153153
If this search finds a file named by a module map, the ``#include_next``
154154
directive is translated into an import, just like for a ``#include``
155-
directive.``
155+
directive.
156156

157157
Module maps
158158
-----------

clang/test/Driver/print-supported-extensions-riscv.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@
196196
// CHECK-NEXT: xqcicm 0.2 'Xqcicm' (Qualcomm uC Conditional Move Extension)
197197
// CHECK-NEXT: xqcics 0.2 'Xqcics' (Qualcomm uC Conditional Select Extension)
198198
// CHECK-NEXT: xqcicsr 0.2 'Xqcicsr' (Qualcomm uC CSR Extension)
199+
// CHECK-NEXT: xqciint 0.2 'Xqciint' (Qualcomm uC Interrupts Extension)
199200
// CHECK-NEXT: xqcilsm 0.2 'Xqcilsm' (Qualcomm uC Load Store Multiple Extension)
200201
// CHECK-NEXT: xqcisls 0.2 'Xqcisls' (Qualcomm uC Scaled Load Store Extension)
201202
// CHECK-EMPTY:

clang/test/utils/update_cc_test_checks/Inputs/c-symbol-mangling.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
// UTC_ARGS: --enable
1919

2020
#ifdef __arm__
21-
/// FIXME: UTC does not find this function, but can find all others.
2221
typedef __attribute__((neon_vector_type(8))) __INT8_TYPE__ int8x8_t;
2322
int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
2423
return a + b + c;

clang/test/utils/update_cc_test_checks/Inputs/c-symbol-mangling.c.expected

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,22 @@
1818
// UTC_ARGS: --enable
1919

2020
#ifdef __arm__
21-
/// FIXME: UTC does not find this function, but can find all others.
2221
typedef __attribute__((neon_vector_type(8))) __INT8_TYPE__ int8x8_t;
22+
// THUMB-DARWIN-LABEL: @test_vaba_s8(
23+
// THUMB-DARWIN-NEXT: entry:
24+
// THUMB-DARWIN-NEXT: [[A_ADDR:%.*]] = alloca <8 x i8>, align 8
25+
// THUMB-DARWIN-NEXT: [[B_ADDR:%.*]] = alloca <8 x i8>, align 8
26+
// THUMB-DARWIN-NEXT: [[C_ADDR:%.*]] = alloca <8 x i8>, align 8
27+
// THUMB-DARWIN-NEXT: store <8 x i8> [[A:%.*]], ptr [[A_ADDR]], align 8
28+
// THUMB-DARWIN-NEXT: store <8 x i8> [[B:%.*]], ptr [[B_ADDR]], align 8
29+
// THUMB-DARWIN-NEXT: store <8 x i8> [[C:%.*]], ptr [[C_ADDR]], align 8
30+
// THUMB-DARWIN-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A_ADDR]], align 8
31+
// THUMB-DARWIN-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B_ADDR]], align 8
32+
// THUMB-DARWIN-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
33+
// THUMB-DARWIN-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[C_ADDR]], align 8
34+
// THUMB-DARWIN-NEXT: [[ADD1:%.*]] = add <8 x i8> [[ADD]], [[TMP2]]
35+
// THUMB-DARWIN-NEXT: ret <8 x i8> [[ADD1]]
36+
//
2337
int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
2438
return a + b + c;
2539
}

llvm/docs/RISCVUsage.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,9 @@ The current vendor extensions supported are:
447447
``experimental-Xqcicsr``
448448
LLVM implements `version 0.2 of the Qualcomm uC CSR extension specification <https://github.com/quic/riscv-unified-db/releases/latest>`__ by Qualcomm. All instructions are prefixed with `qc.` as described in the specification. These instructions are only available for riscv32.
449449

450+
``experimental-Xqciint``
451+
LLVM implements `version 0.2 of the Qualcomm uC Interrupts extension specification <https://github.com/quic/riscv-unified-db/releases/latest>`__ by Qualcomm. All instructions are prefixed with `qc.` as described in the specification. These instructions are only available for riscv32.
452+
450453
``experimental-Xqcilsm``
451454
LLVM implements `version 0.2 of the Qualcomm uC Load Store Multiple extension specification <https://github.com/quic/riscv-unified-db/releases/latest>`__ by Qualcomm. All instructions are prefixed with `qc.` as described in the specification. These instructions are only available for riscv32.
452455

llvm/docs/ReleaseNotes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@ Changes to the RISC-V Backend
235235
extension.
236236
* Adds experimental assembler support for the Qualcomm uC 'Xqcicm` (Conditonal Move)
237237
extension.
238+
* Adds experimental assembler support for the Qualcomm uC 'Xqciint` (Interrupts)
239+
extension.
238240
* Added ``Sdext`` and ``Sdtrig`` extensions.
239241

240242
Changes to the WebAssembly Backend

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,12 @@ typedef TargetTransformInfo TTI;
211211
/// for IR-level transformations.
212212
class TargetTransformInfo {
213213
public:
214+
enum PartialReductionExtendKind { PR_None, PR_SignExtend, PR_ZeroExtend };
215+
216+
/// Get the kind of extension that an instruction represents.
217+
static PartialReductionExtendKind
218+
getPartialReductionExtendKind(Instruction *I);
219+
214220
/// Construct a TTI object using a type implementing the \c Concept
215221
/// API below.
216222
///
@@ -1280,6 +1286,20 @@ class TargetTransformInfo {
12801286
/// \return if target want to issue a prefetch in address space \p AS.
12811287
bool shouldPrefetchAddressSpace(unsigned AS) const;
12821288

1289+
/// \return The cost of a partial reduction, which is a reduction from a
1290+
/// vector to another vector with fewer elements of larger size. They are
1291+
/// represented by the llvm.experimental.partial.reduce.add intrinsic, which
1292+
/// takes an accumulator and a binary operation operand that itself is fed by
1293+
/// two extends. An example of an operation that uses a partial reduction is a
1294+
/// dot product, which reduces two vectors to another of 4 times fewer and 4
1295+
/// times larger elements.
1296+
InstructionCost
1297+
getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
1298+
Type *AccumType, ElementCount VF,
1299+
PartialReductionExtendKind OpAExtend,
1300+
PartialReductionExtendKind OpBExtend,
1301+
std::optional<unsigned> BinOp = std::nullopt) const;
1302+
12831303
/// \return The maximum interleave factor that any transform should try to
12841304
/// perform for this target. This number depends on the level of parallelism
12851305
/// and the number of execution units in the CPU.
@@ -2107,6 +2127,20 @@ class TargetTransformInfo::Concept {
21072127
/// \return if target want to issue a prefetch in address space \p AS.
21082128
virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
21092129

2130+
/// \return The cost of a partial reduction, which is a reduction from a
2131+
/// vector to another vector with fewer elements of larger size. They are
2132+
/// represented by the llvm.experimental.partial.reduce.add intrinsic, which
2133+
/// takes an accumulator and a binary operation operand that itself is fed by
2134+
/// two extends. An example of an operation that uses a partial reduction is a
2135+
/// dot product, which reduces two vectors to another of 4 times fewer and 4
2136+
/// times larger elements.
2137+
virtual InstructionCost
2138+
getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
2139+
Type *AccumType, ElementCount VF,
2140+
PartialReductionExtendKind OpAExtend,
2141+
PartialReductionExtendKind OpBExtend,
2142+
std::optional<unsigned> BinOp) const = 0;
2143+
21102144
virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
21112145
virtual InstructionCost getArithmeticInstrCost(
21122146
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
@@ -2786,6 +2820,16 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
27862820
return Impl.shouldPrefetchAddressSpace(AS);
27872821
}
27882822

2823+
InstructionCost getPartialReductionCost(
2824+
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
2825+
ElementCount VF, PartialReductionExtendKind OpAExtend,
2826+
PartialReductionExtendKind OpBExtend,
2827+
std::optional<unsigned> BinOp = std::nullopt) const override {
2828+
return Impl.getPartialReductionCost(Opcode, InputTypeA, InputTypeB,
2829+
AccumType, VF, OpAExtend, OpBExtend,
2830+
BinOp);
2831+
}
2832+
27892833
unsigned getMaxInterleaveFactor(ElementCount VF) override {
27902834
return Impl.getMaxInterleaveFactor(VF);
27912835
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,15 @@ class TargetTransformInfoImplBase {
585585
bool enableWritePrefetching() const { return false; }
586586
bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
587587

588+
InstructionCost
589+
getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
590+
Type *AccumType, ElementCount VF,
591+
TTI::PartialReductionExtendKind OpAExtend,
592+
TTI::PartialReductionExtendKind OpBExtend,
593+
std::optional<unsigned> BinOp = std::nullopt) const {
594+
return InstructionCost::getInvalid();
595+
}
596+
588597
unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
589598

590599
InstructionCost getArithmeticInstrCost(

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,15 @@ bool TargetTransformInfo::shouldPrefetchAddressSpace(unsigned AS) const {
863863
return TTIImpl->shouldPrefetchAddressSpace(AS);
864864
}
865865

866+
InstructionCost TargetTransformInfo::getPartialReductionCost(
867+
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
868+
ElementCount VF, PartialReductionExtendKind OpAExtend,
869+
PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp) const {
870+
return TTIImpl->getPartialReductionCost(Opcode, InputTypeA, InputTypeB,
871+
AccumType, VF, OpAExtend, OpBExtend,
872+
BinOp);
873+
}
874+
866875
unsigned TargetTransformInfo::getMaxInterleaveFactor(ElementCount VF) const {
867876
return TTIImpl->getMaxInterleaveFactor(VF);
868877
}
@@ -974,6 +983,15 @@ InstructionCost TargetTransformInfo::getShuffleCost(
974983
return Cost;
975984
}
976985

986+
TargetTransformInfo::PartialReductionExtendKind
987+
TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) {
988+
if (isa<SExtInst>(I))
989+
return PR_SignExtend;
990+
if (isa<ZExtInst>(I))
991+
return PR_ZeroExtend;
992+
return PR_None;
993+
}
994+
977995
TTI::CastContextHint
978996
TargetTransformInfo::getCastContextHint(const Instruction *I) {
979997
if (!I)

llvm/lib/TableGen/TGLexer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
814814
if (PrepIncludeStack.back().empty())
815815
return ReturnError(TokStart, "#endif without #ifdef");
816816

817-
auto &IfdefOrElseEntry = PrepIncludeStack.back().back();
817+
[[maybe_unused]] auto &IfdefOrElseEntry = PrepIncludeStack.back().back();
818818

819819
assert((IfdefOrElseEntry.Kind == tgtok::Ifdef ||
820820
IfdefOrElseEntry.Kind == tgtok::Else) &&

0 commit comments

Comments
 (0)